Mercurial > ~dholland > hg > swallowtail > index.cgi
view database/schema/classify.sql @ 51:ef6d572c4e1e
switch to python3 style print()
author | David A. Holland |
---|---|
date | Sat, 02 Apr 2022 18:14:40 -0400 |
parents | cd36b49f4437 |
children | 40f64a96481f |
line wrap: on
line source
-- -- Classification. -- -- -- Sorting and classifying bug reports usefully is a nontrivial -- problem. For this reason, rather than bake in any particular -- taxonomy, we allow multiple arbitrary classification schemes, -- all of which are created equal. (Administratively, some will -- be more equal than others, but that does not concern the -- database.) -- -- There are several classification schemes/taxonomies that already -- exist or have been invented but not yet deployed. These fall into -- the following categories: -- -- 1. Hierarchical taxonomy. The only existing hierarchical taxonomy -- is the one dholland uses in the out-of-GNATS PR lists, which -- classifies PRs by location in the system. A second projected one is -- classification by consequences (crashes, hangs, etc.) The possible -- classifications are defined in advance and stored in the database. -- -- 2. Flat taxonomy, that is, a selection from an enumeration of -- values. Existing schemes of this type include: -- -- * the old GNATS scheme of categories (bin, kern, lib, etc.) -- * the old GNATS scheme of classes (sw-bug, doc-bug, etc.) -- * the old GNATS severity field (critical, serious, non-critical) -- * the old GNATS priority field (high, medium, low) -- -- For these the possible values are defined in advance and stored in -- the database. -- -- 3. String value. Most of the time this is really a flat taxonomy -- where the number of possible values is large or changes dynamically -- with external circumstances, or both, such that storing a copy of -- the legal values in the database would create a maintenance hassle. -- There are none of these at present; the only projected one at the -- moment is FreeBSD's idea of "the nearest man page to where the -- problem appears to be". -- -- 4. Systems of tags, that is, zero or more selections from a list -- of possible values. Current schemes of this type include the -- [456]-ONLY tags, the 6-CRITICAL/6-IMPORTANT/etc. release branch -- tags, the STUCK tag, EASY, PATCH, PULLUPS-NEEDED, etc. Some of -- these should be separate flat taxonomy schemes rather than tags, -- and some of them should maybe be states rather than tags too. -- (And, perhaps some of the current states should be tags...) -- In addition we want to allow every developer to have their own -- private tags that aren't exposed. And, we might want to have -- project-only tags that other developers can see but users can't. -- (It remains to be seen if that's really a good idea.) -- -- Note also that a list of PRs is equivalent to a tag on those PRs. -- -- It is also possible that there may be additional classifications -- (possibly of any of the above types, but most likely tags) that are -- views rather than data, that is, derived from other information. -- One example of this is "PRs in feedback more than 6 months". -- -- -- We assume that new classification schemes will be added on the fly, -- not frequently, but frequently enough that the list of schemes -- should not be hard-coded into programs that access the database, -- and we don't want to have to create a new table for each new scheme -- either. -- -- Therefore, the way I'll do this is to create one table (or rather, -- one family of tables) for each type of classification scheme, and -- make them able to handle arbitrary instances. -- -- For hierarchical schemes there is no point trying to encode the -- hierarchical structure in SQL; that is a waste of time. Instead -- we'll expand the tree of allowed values and rely on the access -- software to present the schemes sensibly. -- -- It remains unclear what views ought to be defined. -- -- -- Notes on the representations: -- * a scheme is "total" if it should be defined on all PRs -- * if the data contains a null value for a particular scheme, -- that means "not specified yet". -- * the hierarchical and flat schemes are no different schema- -- wise but are separate to allow different handling in software. -- * the ordering field in the values tables should be used so that -- ORDER BY ordering ASCENDING produces the desired output order -- of the legal values. -- * the obsolete field for values is for entries that are allowed -- to still exist in the database but that should not be used -- with new PRs or for new classifications of old PRs. -- * the ordering field for the classifications themselves should -- be used so that ORDER BY ordering ASCENDING produces the -- desired output order of the classification schemes. The -- numbers are global across classification scheme types so -- schemes of the same type do not need to be sorted together. -- -- ------------------------------------------------------------ -- Hierarchical schemes. -- names and properties of the hierarchical schemes CREATE TABLE hierclass_names ( name text primary key, ordering int not null, total boolean not null, description text not null ) WITHOUT OIDS; -- allowed values of the hierarchical schemes CREATE TABLE hierclass_values ( scheme text not null references hierclass_names (name), value text not null, ordering int not null, obsolete boolean not null, description text not null, primary key (scheme, value) ) WITHOUT OIDS; -- classification of PRs according to the hierarchical schemes CREATE TABLE hierclass_data ( pr bigint not null references PRs (id), scheme text not null references hierclass_names (name), value text , primary key (pr, scheme), -- -- Ok, the intended semantics here are: -- * For PRs that the classification doesn't apply to, there -- should be no row at all in the *class_data table. -- * For PRs where the classification is undetermined or -- unassigned, the value column should be null, and if -- that's allowed there should be a null entry in -- the *class_values table. -- I believe "MATCH SIMPLE" to be capable of enforcing this, -- but that's from reading postgres docs and not from actually -- checking it. From the description, "MATCH FULL" will do the -- wrong thing, and "MATCH PARTIAL" isn't supported. -- foreign key (scheme, value) references hierclass_values (scheme, value) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT ) WITHOUT OIDS; ------------------------------------------------------------ -- Enumerated flat schemes. -- names and properties of the flat schemes CREATE TABLE flatclass_names ( name text primary key, ordering int not null, total boolean not null, description text not null ) WITHOUT OIDS; -- allowed values of the flat schemes CREATE TABLE flatclass_values ( scheme text not null references flatclass_names (name), value text not null, ordering int not null, obsolete boolean not null, description text not null, primary key (scheme, value) ) WITHOUT OIDS; -- classification of PRs according to the flat schemes CREATE TABLE flatclass_data ( pr bigint not null references PRs (id), scheme text not null references flatclass_names (name), value text , primary key (pr, scheme), -- as above foreign key (scheme, value) references flatclass_values (scheme, value) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT ) WITHOUT OIDS; ------------------------------------------------------------ -- Text schemes. -- names and properties of the text schemes CREATE TABLE textclass_names ( name text primary key, ordering int not null, total boolean not null, description text not null ) WITHOUT OIDS; -- classification of PRs according to the text schemes CREATE TABLE textclass_data ( pr bigint not null references PRs (id), scheme text not null references textclass_names (name), value text , primary key (pr, scheme) ) WITHOUT OIDS; ------------------------------------------------------------ -- Tag schemes. -- names and properties of the tag schemes -- (total does not make sense here) CREATE TABLE tagclass_names ( name text primary key, ordering int not null, description text not null ) WITHOUT OIDS; -- allowed values of the tag schemes -- (each PR can reference zero or more of these) CREATE TABLE tagclass_values ( scheme text not null references tagclass_names (name), value text not null, ordering int not null, obsolete boolean not null, description text not null, primary key (scheme, value) ) WITHOUT OIDS; -- classification of PRs according to the tag schemes -- (each PR/scheme pair is listed once for each tag attached to it) CREATE TABLE tagclass_data ( pr bigint not null references PRs (id), scheme text not null references tagclass_names (name), value text , -- as above foreign key (scheme, value) references tagclass_values (scheme, value) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT ) WITHOUT OIDS; -- view where tags are collected into a string -- string_agg() concatenates the value strings CREATE VIEW tagclass_stringdata AS SELECT pr, tagclass_data.scheme as scheme, string_agg(tagclass_data.value, ' ' ORDER BY tagclass_values.ordering) as value FROM tagclass_data, tagclass_values WHERE tagclass_data.scheme = tagclass_values.scheme AND tagclass_data.value = tagclass_values.value GROUP BY pr, tagclass_data.scheme ; ------------------------------------------------------------ -- some views -- views that pull in the ordering fields. -- -- (If you use only one of the ordering fields, as is likely to be -- the case in practice, I think we can rely on the query optimizer -- to drop the join that collects in the other ordering field. If -- not, maybe this should be restructured.) -- -- XXX what do these do when _data.value is null? CREATE VIEW hierclass_data_ordered AS SELECT hierclass_data.pr as pr, hierclass_data.scheme as scheme, hierclass_names.ordering as schemeordering, hierclass_data.value as value, hierclass_values.ordering as valueordering FROM hierclass_data, hierclass_names, hierclass_values WHERE hierclass_data.scheme = hierclass_names.name AND hierclass_data.scheme = hierclass_values.scheme AND hierclass_data.value = hierclass_values.value ; CREATE VIEW flatclass_data_ordered AS SELECT flatclass_data.pr as pr, flatclass_data.scheme as scheme, flatclass_names.ordering as schemeordering, flatclass_data.value as value, flatclass_values.ordering as valueordering FROM flatclass_data, flatclass_names, flatclass_values WHERE flatclass_data.scheme = flatclass_names.name AND flatclass_data.scheme = flatclass_values.scheme AND flatclass_data.value = flatclass_values.value ; -- -- XXX what I'd like to do here is produce textclass_data.value as -- valueordering; then sorting by valueordering would produce the -- desired results (the entries sorted by text order of the values). -- That produces a type conflict though if we combine this view -- with the other ordered views where the ordering is a number. -- -- Next best would be to order by textclass_data.value in this view -- and produce the row number of the result (or some other fresh -- sequence) as valueordering. However, I don't think that's possible. -- -- What's here (using a fixed value of 1) will run but it's not -- particularly desirable. -- CREATE VIEW textclass_data_ordered AS SELECT textclass_data.pr as pr, textclass_data.scheme as scheme, textclass_names.ordering as schemeordering, textclass_data.value as value, 1 as valueordering FROM textclass_data, textclass_names WHERE textclass_data.scheme = textclass_names.name ; CREATE VIEW tagclass_data_ordered AS SELECT tagclass_data.pr as pr, tagclass_data.scheme as scheme, tagclass_names.ordering as schemeordering, tagclass_data.value as value, tagclass_values.ordering as valueordering FROM tagclass_data, tagclass_names, tagclass_values WHERE tagclass_data.scheme = tagclass_names.name AND tagclass_data.scheme = tagclass_values.scheme AND tagclass_data.value = tagclass_values.value ; -- ordered version of tagclass_stringdata -- -- Note that schemeordering is uniquely defined by the group-by -- and the min() on it is there to satisfy the cookiemonster. -- CREATE VIEW tagclass_stringdata_ordered AS SELECT pr, scheme, min(schemeordering), string_agg(value, ' ' ORDER BY valueordering) as value, min(valueordering) as valueordering FROM tagclass_data_ordered GROUP BY pr, scheme ; -- a view that combines all the classification data. CREATE VIEW classifications AS SELECT * FROM hierclass_data_ordered UNION SELECT * FROM flatclass_data_ordered UNION SELECT * FROM textclass_data_ordered UNION SELECT * FROM tagclass_stringdata_ordered ORDER BY schemeordering ;