1 | --
|
---|
2 | -- conventions:
|
---|
3 | -- <table_name>_id is primary internal id (usually autogenerated)
|
---|
4 | -- Authors: Ewan Birney, Elia Stupka, Hilmar Lapp, Aaron Mackey
|
---|
5 | --
|
---|
6 | -- Copyright Mark Schreiber. You may use, modify, and distribute this code under
|
---|
7 | -- the terms of the Creative Commons license.
|
---|
8 | --
|
---|
9 | -- comments to biosql - biosql-l@open-bio.org
|
---|
10 | --
|
---|
11 | -- Migration of the MySQL schema to InnoDB by Hilmar Lapp <hlapp at gmx.net>
|
---|
12 | -- Post-Cape Town changes by Hilmar Lapp.
|
---|
13 | -- Singapore changes by Hilmar Lapp and Aaron Mackey.
|
---|
14 | -- Migration of the
|
---|
15 | --
|
---|
16 | -- Note that some aspects of the schema like uniqueness constraints
|
---|
17 | -- may be changed to best suit your requirements. Search for the tag
|
---|
18 | -- CONFIG and read the documentation you find there.
|
---|
19 | --
|
---|
20 | --
|
---|
21 | -- The Biosql database has bioentries. That is about it.
|
---|
22 | CREATE TABLE biodatabase (
|
---|
23 | biodatabase_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
---|
24 | name VARCHAR ( 128 ) NOT NULL UNIQUE,
|
---|
25 | authority VARCHAR ( 128 ) ,
|
---|
26 | description VARCHAR (30000)
|
---|
27 | ) ;
|
---|
28 |
|
---|
29 | CREATE INDEX db_auth on biodatabase ( authority );
|
---|
30 |
|
---|
31 | -- we could insist that taxa are NCBI taxon id, but on reflection I made this
|
---|
32 | -- an optional extra line, as many flat file formats do not have the NCBI id
|
---|
33 | --
|
---|
34 | -- no organelle/sub species
|
---|
35 | --
|
---|
36 | -- this corresponds to the node table of the NCBI taxonomy database
|
---|
37 | -- left_value, right_value implement a nested sets model;
|
---|
38 | -- see http://www.oreillynet.com/pub/a/network/2002/11/27/bioconf.html
|
---|
39 | -- or Joe Celko's 'SQL for smarties' for more information.
|
---|
40 | CREATE TABLE taxon (
|
---|
41 | taxon_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
---|
42 | ncbi_taxon_id INTEGER NOT NULL UNIQUE,
|
---|
43 | parent_taxon_id INTEGER ,
|
---|
44 | node_rank VARCHAR ( 32 ) ,
|
---|
45 | genetic_code SMALLINT ,
|
---|
46 | mito_genetic_code SMALLINT ,
|
---|
47 | left_value INTEGER NOT NULL UNIQUE,
|
---|
48 | right_value INTEGER NOT NULL UNIQUE
|
---|
49 | );
|
---|
50 | CREATE INDEX taxparent ON taxon ( parent_taxon_id );
|
---|
51 |
|
---|
52 | -- corresponds to the names table of the NCBI taxonomy databaase
|
---|
53 | CREATE TABLE taxon_name (
|
---|
54 | taxon_id INTEGER NOT NULL ,
|
---|
55 | name VARCHAR ( 255 ) NOT NULL ,
|
---|
56 | name_class VARCHAR ( 32 ) NOT NULL,
|
---|
57 | UNIQUE ( name , name_class, taxon_id )
|
---|
58 | ) ;
|
---|
59 |
|
---|
60 | CREATE INDEX taxnametaxonid ON taxon_name ( taxon_id );
|
---|
61 | CREATE INDEX taxnamename ON taxon_name ( name );
|
---|
62 |
|
---|
63 | -- this is where the namespace (controlled vocabulary) ontology terms live
|
---|
64 | -- we chose to have a separate table for this instead of reusing biodatabase
|
---|
65 | CREATE TABLE ontology (
|
---|
66 | ontology_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
---|
67 | name VARCHAR ( 32 ) NOT NULL UNIQUE,
|
---|
68 | definition VARCHAR (30000)
|
---|
69 |
|
---|
70 | ) ;
|
---|
71 |
|
---|
72 | -- any controlled vocab term, everything from full ontology
|
---|
73 | -- terms eg GO IDs to the various keys allowed as qualifiers
|
---|
74 | CREATE TABLE term (
|
---|
75 | term_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
76 | name VARCHAR ( 255 ) NOT NULL ,
|
---|
77 | definition VARCHAR(30000) ,
|
---|
78 | identifier VARCHAR ( 40 ) NOT NULL,
|
---|
79 | is_obsolete CHAR ( 1 ) NOT NULL,
|
---|
80 | ontology_id INTEGER NOT NULL,
|
---|
81 | UNIQUE ( name , ontology_id , is_obsolete ) ,
|
---|
82 | UNIQUE ( identifier )
|
---|
83 | ) ;
|
---|
84 |
|
---|
85 | CREATE INDEX term_ont ON term ( ontology_id );
|
---|
86 |
|
---|
87 | -- ontology terms have synonyms, here is how to store them.
|
---|
88 | -- Synonym is a reserved word in many RDBMSs, so the column synonym
|
---|
89 | -- may eventually be renamed to name.
|
---|
90 | CREATE TABLE term_synonym (
|
---|
91 | synonym VARCHAR(255) NOT NULL,
|
---|
92 | term_id INTEGER NOT NULL,
|
---|
93 | PRIMARY KEY ( term_id , synonym ) ) ;
|
---|
94 |
|
---|
95 | -- ontology terms to dbxref association: ontology terms have dbxrefs
|
---|
96 | CREATE TABLE term_dbxref (
|
---|
97 | term_id INTEGER NOT NULL ,
|
---|
98 | dbxref_id INTEGER NOT NULL ,
|
---|
99 | rank INTEGER ,
|
---|
100 | PRIMARY KEY ( term_id , dbxref_id ) ) ;
|
---|
101 |
|
---|
102 | CREATE INDEX trmdbxref_dbxrefid ON term_dbxref ( dbxref_id );
|
---|
103 |
|
---|
104 | -- relationship between controlled vocabulary / ontology term
|
---|
105 | -- we use subject/predicate/object but this could also
|
---|
106 | -- be thought of as child/relationship-type/parent.
|
---|
107 | -- the subject/predicate/object naming is better as we
|
---|
108 | -- can think of the graph as composed of statements.
|
---|
109 | --
|
---|
110 | -- we also treat the relationshiptypes / predicates as
|
---|
111 | -- controlled terms in themselves; this is quite useful
|
---|
112 | -- as a lot of systems (eg GO) will soon require
|
---|
113 | -- ontologies of relationship types (eg subtle differences
|
---|
114 | -- in the partOf relationship)
|
---|
115 | --
|
---|
116 | -- this table probably won't be filled for a while, the core
|
---|
117 | -- will just treat ontologies as flat lists of terms
|
---|
118 | CREATE TABLE term_relationship (
|
---|
119 | term_relationship_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
120 | subject_term_id INTEGER NOT NULL ,
|
---|
121 | predicate_term_id INTEGER NOT NULL ,
|
---|
122 | object_term_id INTEGER NOT NULL ,
|
---|
123 | ontology_id INTEGER NOT NULL ,
|
---|
124 | UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id ) ) ;
|
---|
125 |
|
---|
126 | CREATE INDEX trmrel_predicateid ON term_relationship ( predicate_term_id );
|
---|
127 | CREATE INDEX trmrel_objectid ON term_relationship ( object_term_id );
|
---|
128 | CREATE INDEX trmrel_ontid ON term_relationship ( ontology_id );
|
---|
129 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
130 | -- use the composite index for the initial keys
|
---|
131 | --CREATE INDEX trmrel_subjectid ON term_relationship(subject_term_id);
|
---|
132 |
|
---|
133 | -- This lets one associate a single term with a term_relationship
|
---|
134 | -- effecively allowing us to treat triples as 1st class terms.
|
---|
135 | --
|
---|
136 | -- At this point this table is only supported in Biojava. If you want
|
---|
137 | -- to know more about the rationale and idea behind it, read the
|
---|
138 | -- following article that Mat Pocock posted to the mailing list:
|
---|
139 | -- http://www.open-bio.org/pipermail/biosql-l/2003-October/000455.html
|
---|
140 | CREATE TABLE term_relationship_term (
|
---|
141 | term_relationship_id INTEGER NOT NULL PRIMARY KEY ,
|
---|
142 | term_id INTEGER UNIQUE NOT NULL
|
---|
143 | );
|
---|
144 |
|
---|
145 | -- the infamous transitive closure table on ontology term relationships
|
---|
146 | -- this is a warehouse approach - you will need to update this regularly
|
---|
147 | --
|
---|
148 | -- the triple of (subject, predicate, object) is the same as for ontology
|
---|
149 | -- relationships, with the exception of predicate being the greatest common
|
---|
150 | -- denominator of the relationships types visited in the path (i.e., if
|
---|
151 | -- relationship type A is-a relationship type B, the greatest common
|
---|
152 | -- denominator for path containing both types A and B is B)
|
---|
153 | --
|
---|
154 | -- See the GO database or Chado schema for other (and possibly better
|
---|
155 | -- documented) implementations of the transitive closure table approach.
|
---|
156 | CREATE TABLE term_path (
|
---|
157 | term_path_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
158 | subject_term_id INTEGER NOT NULL ,
|
---|
159 | predicate_term_id INTEGER NOT NULL ,
|
---|
160 | object_term_id INTEGER NOT NULL ,
|
---|
161 | ontology_id INTEGER NOT NULL ,
|
---|
162 | distance INTEGER NOT NULL,
|
---|
163 | UNIQUE ( subject_term_id , predicate_term_id , object_term_id , ontology_id , distance ) ) ;
|
---|
164 |
|
---|
165 | CREATE INDEX trmpath_predicateid ON term_path ( predicate_term_id );
|
---|
166 | CREATE INDEX trmpath_objectid ON term_path ( object_term_id );
|
---|
167 | CREATE INDEX trmpath_ontid ON term_path ( ontology_id );
|
---|
168 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
169 | -- use the composite index for the initial keys
|
---|
170 | --CREATE INDEX trmpath_subjectid ON term_path(subject_term_id);
|
---|
171 |
|
---|
172 | -- we can be a bioentry without a biosequence, but not visa-versa
|
---|
173 | -- most things are going to be keyed off bioentry_id
|
---|
174 | --
|
---|
175 | -- accession is the stable id, display_id is a potentially volatile,
|
---|
176 | -- human readable name.
|
---|
177 | --
|
---|
178 | -- Version may be unknown, may be undefined, or may not exist for a certain
|
---|
179 | -- accession or database (namespace). We require it here to avoid RDBMS-
|
---|
180 | -- dependend enforcement variants (version is in a compound alternative key),
|
---|
181 | -- and to simplify query construction for UK look-ups. If there is no version
|
---|
182 | -- the convention is to put 0 (zero) here. Likewise, a record with a version
|
---|
183 | -- of zero means the version is to be interpreted as NULL.
|
---|
184 | --
|
---|
185 | -- not all entries have a taxon, but many do.
|
---|
186 | --
|
---|
187 | -- one bioentry only has one taxon! (weirdo chimerias are not handled. tough)
|
---|
188 | --
|
---|
189 | -- Name maps to display_id in bioperl. We have a different column name
|
---|
190 | -- here to avoid confusion with the naming convention for foreign keys.
|
---|
191 | CREATE TABLE bioentry (
|
---|
192 | bioentry_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
193 | biodatabase_id INTEGER NOT NULL ,
|
---|
194 | taxon_id INTEGER ,
|
---|
195 | name VARCHAR ( 40 ) NOT NULL ,
|
---|
196 | accession VARCHAR ( 40 ) NOT NULL ,
|
---|
197 | identifier VARCHAR ( 40 ) NOT NULL,
|
---|
198 | division VARCHAR ( 6 ) ,
|
---|
199 | description VARCHAR( 4096 ) ,
|
---|
200 | version INTEGER NOT NULL ,
|
---|
201 | UNIQUE ( accession , biodatabase_id , version ) ,
|
---|
202 | -- CONFIG: uncomment one (and only one) of the two lines below. The
|
---|
203 | -- first puts a uniqueness constraint on the identifier column alone;
|
---|
204 | -- the other one puts a uniqueness constraint on identifier only
|
---|
205 | -- within a namespace.
|
---|
206 | -- UNIQUE ( identifier )
|
---|
207 | UNIQUE ( identifier , biodatabase_id )
|
---|
208 | ) ;
|
---|
209 |
|
---|
210 | CREATE INDEX bioentry_name ON bioentry ( name );
|
---|
211 | CREATE INDEX bioentry_db ON bioentry ( biodatabase_id );
|
---|
212 | CREATE INDEX bioentry_tax ON bioentry ( taxon_id );
|
---|
213 |
|
---|
214 | --
|
---|
215 | -- bioentry-bioentry relationships: these are typed
|
---|
216 | --
|
---|
217 | CREATE TABLE bioentry_relationship (
|
---|
218 | bioentry_relationship_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
219 | object_bioentry_id INTEGER NOT NULL ,
|
---|
220 | subject_bioentry_id INTEGER NOT NULL ,
|
---|
221 | term_id INTEGER NOT NULL ,
|
---|
222 | rank INTEGER ,
|
---|
223 | UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id )
|
---|
224 | ) ;
|
---|
225 |
|
---|
226 | CREATE INDEX bioentryrel_trm ON bioentry_relationship ( term_id );
|
---|
227 | CREATE INDEX bioentryrel_child ON bioentry_relationship (subject_bioentry_id);
|
---|
228 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
229 | -- use the composite index for the initial keys
|
---|
230 | --CREATE INDEX bioentryrel_parent ON bioentry_relationship(object_bioentry_id);
|
---|
231 |
|
---|
232 | -- for deep (depth > 1) bioentry relationship trees we need a transitive
|
---|
233 | -- closure table too
|
---|
234 | CREATE TABLE bioentry_path (
|
---|
235 | object_bioentry_id INTEGER NOT NULL ,
|
---|
236 | subject_bioentry_id INTEGER NOT NULL ,
|
---|
237 | term_id INTEGER NOT NULL ,
|
---|
238 | distance INTEGER NOT NULL,
|
---|
239 | UNIQUE ( object_bioentry_id , subject_bioentry_id , term_id , distance ) ) ;
|
---|
240 |
|
---|
241 | CREATE INDEX bioentrypath_trm ON bioentry_path ( term_id );
|
---|
242 | CREATE INDEX bioentrypath_child ON bioentry_path ( subject_bioentry_id );
|
---|
243 |
|
---|
244 | -- some bioentries will have a sequence
|
---|
245 | -- biosequence because sequence is sometimes a reserved word
|
---|
246 | CREATE TABLE biosequence (
|
---|
247 | bioentry_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
248 | version INTEGER ,
|
---|
249 | length INTEGER ,
|
---|
250 | alphabet VARCHAR ( 10 ) ,
|
---|
251 | seq CLOB
|
---|
252 | ) ;
|
---|
253 |
|
---|
254 | -- database cross-references (e.g., GenBank:AC123456.1)
|
---|
255 | --
|
---|
256 | -- Version may be unknown, may be undefined, or may not exist for a certain
|
---|
257 | -- accession or database (namespace). We require it here to avoid RDBMS-
|
---|
258 | -- dependend enforcement variants (version is in a compound alternative key),
|
---|
259 | -- and to simplify query construction for UK look-ups. If there is no version
|
---|
260 | -- the convention is to put 0 (zero) here. Likewise, a record with a version
|
---|
261 | -- of zero means the version is to be interpreted as NULL.
|
---|
262 | --
|
---|
263 | CREATE TABLE dbxref (
|
---|
264 | dbxref_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
265 | dbname VARCHAR ( 40 ) NOT NULL ,
|
---|
266 | accession VARCHAR ( 40 ) NOT NULL ,
|
---|
267 | version INTEGER NOT NULL ,
|
---|
268 | UNIQUE ( accession , dbname , version ) ) ;
|
---|
269 |
|
---|
270 | CREATE INDEX dbxref_db ON dbxref ( dbname );
|
---|
271 |
|
---|
272 | -- for roundtripping embl/genbank, we need to have the "optional ID"
|
---|
273 | -- for the dbxref.
|
---|
274 | --
|
---|
275 | -- another use of this table could be for storing
|
---|
276 | -- descriptive text for a dbxref. for example, we may want to
|
---|
277 | -- know stuff about the interpro accessions we store (without
|
---|
278 | -- importing all of interpro), so we can attach the text
|
---|
279 | -- description as a synonym
|
---|
280 | CREATE TABLE dbxref_qualifier_value (
|
---|
281 | dbxref_id INTEGER NOT NULL ,
|
---|
282 | term_id INTEGER NOT NULL ,
|
---|
283 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
284 | value VARCHAR (30000) ,
|
---|
285 | PRIMARY KEY ( dbxref_id , term_id , rank ) ) ;
|
---|
286 |
|
---|
287 | CREATE INDEX dbxrefqual_dbx ON dbxref_qualifier_value ( dbxref_id );
|
---|
288 | CREATE INDEX dbxrefqual_trm ON dbxref_qualifier_value ( term_id );
|
---|
289 |
|
---|
290 | -- Direct dblinks. It is tempting to do this
|
---|
291 | -- from bioentry_id to bioentry_id. But that wont work
|
---|
292 | -- during updates of one database - we will have to edit
|
---|
293 | -- this table each time. Better to do the join through accession
|
---|
294 | -- and db each time. Should be almost as cheap
|
---|
295 | CREATE TABLE bioentry_dbxref (
|
---|
296 | bioentry_id INTEGER NOT NULL ,
|
---|
297 | dbxref_id INTEGER NOT NULL ,
|
---|
298 | rank INTEGER ,
|
---|
299 | PRIMARY KEY ( bioentry_id , dbxref_id ) ) ;
|
---|
300 |
|
---|
301 | CREATE INDEX dblink_dbx ON bioentry_dbxref ( dbxref_id );
|
---|
302 |
|
---|
303 | -- We can have multiple references per bioentry, but one reference
|
---|
304 | -- can also be used for the same bioentry.
|
---|
305 | --
|
---|
306 | -- No two references can reference the same reference database entry
|
---|
307 | -- (dbxref_id). This is where the MEDLINE id goes: PUBMED:123456.
|
---|
308 | CREATE TABLE reference (
|
---|
309 | reference_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
310 | dbxref_id INTEGER NOT NULL UNIQUE,
|
---|
311 | location VARCHAR (30000) NOT NULL ,
|
---|
312 | title VARCHAR (30000) ,
|
---|
313 | authors VARCHAR (30000) ,
|
---|
314 | crc VARCHAR ( 32 ) UNIQUE NOT NULL
|
---|
315 | ) ;
|
---|
316 |
|
---|
317 | -- bioentry to reference associations
|
---|
318 | CREATE TABLE bioentry_reference (
|
---|
319 | bioentry_id INTEGER NOT NULL ,
|
---|
320 | reference_id INTEGER NOT NULL ,
|
---|
321 | start_pos INTEGER ,
|
---|
322 | end_pos INTEGER ,
|
---|
323 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
324 | PRIMARY KEY ( bioentry_id , reference_id , rank ) ) ;
|
---|
325 |
|
---|
326 | CREATE INDEX bioentryref_ref ON bioentry_reference ( reference_id );
|
---|
327 |
|
---|
328 | -- We can have multiple comments per seqentry, and
|
---|
329 | -- comments can have embedded '\n' characters
|
---|
330 | CREATE TABLE comment (
|
---|
331 | comment_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
---|
332 | bioentry_id INTEGER NOT NULL ,
|
---|
333 | comment_text VARCHAR (30000) NOT NULL ,
|
---|
334 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
335 | UNIQUE ( bioentry_id , rank ) ) ;
|
---|
336 |
|
---|
337 | -- tag/value and ontology term annotation for bioentries goes here
|
---|
338 | CREATE TABLE bioentry_qualifier_value (
|
---|
339 | bioentry_id INTEGER NOT NULL ,
|
---|
340 | term_id INTEGER NOT NULL ,
|
---|
341 | value VARCHAR (30000) ,
|
---|
342 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
343 | UNIQUE ( bioentry_id , term_id , rank ) ) ;
|
---|
344 |
|
---|
345 | CREATE INDEX bioentryqual_trm ON bioentry_qualifier_value ( term_id );
|
---|
346 |
|
---|
347 | -- feature table. We cleanly handle
|
---|
348 | -- - simple locations
|
---|
349 | -- - split locations
|
---|
350 | -- - split locations on remote sequences
|
---|
351 | CREATE TABLE seqfeature (
|
---|
352 | seqfeature_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
353 | bioentry_id INTEGER NOT NULL ,
|
---|
354 | type_term_id INTEGER NOT NULL ,
|
---|
355 | source_term_id INTEGER NOT NULL ,
|
---|
356 | display_name VARCHAR ( 64 ) ,
|
---|
357 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
358 | UNIQUE ( bioentry_id , type_term_id , source_term_id , rank ) ) ;
|
---|
359 |
|
---|
360 | CREATE INDEX seqfeature_trm ON seqfeature ( type_term_id );
|
---|
361 | CREATE INDEX seqfeature_fsrc ON seqfeature ( source_term_id );
|
---|
362 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
363 | -- use the composite index for the initial keys
|
---|
364 | --CREATE INDEX seqfeature_bioentryid ON seqfeature(bioentry_id);
|
---|
365 |
|
---|
366 | -- seqfeatures can be arranged in containment hierarchies.
|
---|
367 | -- one can imagine storing other relationships between features,
|
---|
368 | -- in this case the term_id can be used to type the relationship
|
---|
369 | CREATE TABLE seqfeature_relationship (
|
---|
370 | seqfeature_relationship_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
371 | object_seqfeature_id INTEGER NOT NULL ,
|
---|
372 | subject_seqfeature_id INTEGER NOT NULL ,
|
---|
373 | term_id INTEGER NOT NULL ,
|
---|
374 | rank INTEGER ,
|
---|
375 | UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id ) ) ;
|
---|
376 |
|
---|
377 | CREATE INDEX seqfeaturerel_trm ON seqfeature_relationship ( term_id );
|
---|
378 | CREATE INDEX seqfeaturerel_child ON seqfeature_relationship ( subject_seqfeature_id );
|
---|
379 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
380 | -- use the composite index for the initial keys
|
---|
381 | --CREATE INDEX seqfeaturerel_parent ON seqfeature_relationship(object_seqfeature_id);
|
---|
382 |
|
---|
383 | -- for deep (depth > 1) seqfeature relationship trees we need a transitive
|
---|
384 | -- closure table too
|
---|
385 | CREATE TABLE seqfeature_path (
|
---|
386 | object_seqfeature_id INTEGER NOT NULL ,
|
---|
387 | subject_seqfeature_id INTEGER NOT NULL ,
|
---|
388 | term_id INTEGER NOT NULL ,
|
---|
389 | distance INTEGER NOT NULL,
|
---|
390 | UNIQUE ( object_seqfeature_id , subject_seqfeature_id , term_id , distance ) ) ;
|
---|
391 |
|
---|
392 | CREATE INDEX seqfeaturepath_trm ON seqfeature_path ( term_id );
|
---|
393 | CREATE INDEX seqfeaturepath_child ON seqfeature_path ( subject_seqfeature_id );
|
---|
394 | -- CONFIG: you may want to add this if you can't get the optimizer to
|
---|
395 | -- use the composite index for the initial keys
|
---|
396 | --CREATE INDEX seqfeaturerel_parent ON seqfeature_path(object_seqfeature_id);
|
---|
397 |
|
---|
398 | -- tag/value associations - or ontology annotations
|
---|
399 | CREATE TABLE seqfeature_qualifier_value (
|
---|
400 | seqfeature_id INTEGER NOT NULL ,
|
---|
401 | term_id INTEGER NOT NULL ,
|
---|
402 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
403 | value VARCHAR (1024) NOT NULL ,
|
---|
404 | PRIMARY KEY ( seqfeature_id , term_id , rank ) ) ;
|
---|
405 |
|
---|
406 | CREATE INDEX seqfeaturequal_trm ON seqfeature_qualifier_value ( term_id );
|
---|
407 |
|
---|
408 | -- DBXrefs for features. This is necessary for genome oriented viewpoints,
|
---|
409 | -- where you have a few have long sequences (contigs, or chromosomes) with many
|
---|
410 | -- features on them. In that case the features are the semantic scope for
|
---|
411 | -- their annotation bundles, not the bioentry they are attached to.
|
---|
412 | CREATE TABLE seqfeature_dbxref (
|
---|
413 | seqfeature_id INTEGER NOT NULL ,
|
---|
414 | dbxref_id INTEGER NOT NULL ,
|
---|
415 | rank INTEGER ,
|
---|
416 | PRIMARY KEY ( seqfeature_id , dbxref_id ) ) ;
|
---|
417 |
|
---|
418 | CREATE INDEX feadblink_dbx ON seqfeature_dbxref ( dbxref_id );
|
---|
419 |
|
---|
420 | -- basically we model everything as potentially having
|
---|
421 | -- any number of locations, ie, a split location. SimpleLocations
|
---|
422 | -- just have one location. We need to have a location id for the qualifier
|
---|
423 | -- associations of fuzzy locations.
|
---|
424 | --
|
---|
425 | -- please do not try to model complex assemblies with this thing. It wont
|
---|
426 | -- work. Check out the ensembl schema for this.
|
---|
427 | --
|
---|
428 | -- we allow nulls for start/end - this is useful for fuzzies as
|
---|
429 | -- standard range queries will not be included
|
---|
430 | --
|
---|
431 | -- for remote locations, the join to make is to DBXref
|
---|
432 | --
|
---|
433 | -- the FK to term is a possibility to store the type of the
|
---|
434 | -- location for determining in one hit whether it's a fuzzy or not
|
---|
435 | CREATE TABLE location (
|
---|
436 | location_id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY ,
|
---|
437 | seqfeature_id INTEGER NOT NULL ,
|
---|
438 | dbxref_id INTEGER ,
|
---|
439 | term_id INTEGER ,
|
---|
440 | start_pos INTEGER ,
|
---|
441 | end_pos INTEGER ,
|
---|
442 | strand INTEGER NOT NULL DEFAULT 0 ,
|
---|
443 | rank INTEGER NOT NULL DEFAULT 0 ,
|
---|
444 | UNIQUE ( seqfeature_id , rank ) ) ;
|
---|
445 |
|
---|
446 | CREATE INDEX seqfeatureloc_start ON location ( start_pos, end_pos );
|
---|
447 | CREATE INDEX seqfeatureloc_dbx ON location ( dbxref_id );
|
---|
448 | CREATE INDEX seqfeatureloc_trm ON location ( term_id );
|
---|
449 |
|
---|
450 | -- location qualifiers - mainly intended for fuzzies but anything
|
---|
451 | -- can go in here
|
---|
452 | -- some controlled vocab terms have slots;
|
---|
453 | -- fuzzies could be modeled as min_start(5), max_start(5)
|
---|
454 | --
|
---|
455 | -- there is no restriction on extending the fuzzy ontology
|
---|
456 | -- for your own nefarious aims, although the bio* apis will
|
---|
457 | -- most likely ignore these
|
---|
458 | CREATE TABLE location_qualifier_value (
|
---|
459 | location_id INTEGER NOT NULL ,
|
---|
460 | term_id INTEGER NOT NULL ,
|
---|
461 | value VARCHAR ( 255 ) NOT NULL ,
|
---|
462 | int_value INTEGER ,
|
---|
463 | PRIMARY KEY ( location_id , term_id ) ) ;
|
---|
464 |
|
---|
465 | CREATE INDEX locationqual_trm ON location_qualifier_value ( term_id );
|
---|
466 |
|
---|
467 | --
|
---|
468 | -- Create the foreign key constraints
|
---|
469 | --
|
---|
470 |
|
---|
471 | -- ontology term
|
---|
472 | ALTER TABLE term ADD CONSTRAINT FKont_term
|
---|
473 | FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
|
---|
474 | ON DELETE CASCADE ;
|
---|
475 |
|
---|
476 | -- term synonyms
|
---|
477 | ALTER TABLE term_synonym ADD CONSTRAINT FKterm_syn
|
---|
478 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
|
---|
479 | ON DELETE CASCADE ;
|
---|
480 |
|
---|
481 | -- term_dbxref
|
---|
482 | ALTER TABLE term_dbxref ADD CONSTRAINT FKdbxref_trmdbxref
|
---|
483 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
|
---|
484 | ON DELETE CASCADE ;
|
---|
485 | ALTER TABLE term_dbxref ADD CONSTRAINT FKterm_trmdbxref
|
---|
486 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id )
|
---|
487 | ON DELETE CASCADE ;
|
---|
488 |
|
---|
489 | -- term_relationship
|
---|
490 | ALTER TABLE term_relationship ADD CONSTRAINT FKtrmsubject_trmrel
|
---|
491 | FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
|
---|
492 | ON DELETE CASCADE ;
|
---|
493 | ALTER TABLE term_relationship ADD CONSTRAINT FKtrmpredicate_trmrel
|
---|
494 | FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
|
---|
495 | ON DELETE CASCADE ;
|
---|
496 | ALTER TABLE term_relationship ADD CONSTRAINT FKtrmobject_trmrel
|
---|
497 | FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
|
---|
498 | ON DELETE CASCADE ;
|
---|
499 | ALTER TABLE term_relationship ADD CONSTRAINT FKontology_trmrel
|
---|
500 | FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
|
---|
501 | ON DELETE CASCADE ;
|
---|
502 |
|
---|
503 | -- term_relationship_term
|
---|
504 | ALTER TABLE term_relationship_term ADD CONSTRAINT FKtrmrel_trmreltrm
|
---|
505 | FOREIGN KEY (term_relationship_id) REFERENCES term_relationship(term_relationship_id)
|
---|
506 | ON DELETE CASCADE ;
|
---|
507 | ALTER TABLE term_relationship_term ADD CONSTRAINT FKtrm_trmreltrm
|
---|
508 | FOREIGN KEY (term_id) REFERENCES term(term_id)
|
---|
509 | ON DELETE CASCADE ;
|
---|
510 |
|
---|
511 | -- term_path
|
---|
512 | ALTER TABLE term_path ADD CONSTRAINT FKtrmsubject_trmpath
|
---|
513 | FOREIGN KEY ( subject_term_id ) REFERENCES term ( term_id )
|
---|
514 | ON DELETE CASCADE ;
|
---|
515 | ALTER TABLE term_path ADD CONSTRAINT FKtrmpredicate_trmpath
|
---|
516 | FOREIGN KEY ( predicate_term_id ) REFERENCES term ( term_id )
|
---|
517 | ON DELETE CASCADE ;
|
---|
518 | ALTER TABLE term_path ADD CONSTRAINT FKtrmobject_trmpath
|
---|
519 | FOREIGN KEY ( object_term_id ) REFERENCES term ( term_id )
|
---|
520 | ON DELETE CASCADE ;
|
---|
521 | ALTER TABLE term_path ADD CONSTRAINT FKontology_trmpath
|
---|
522 | FOREIGN KEY ( ontology_id ) REFERENCES ontology ( ontology_id )
|
---|
523 | ON DELETE CASCADE ;
|
---|
524 |
|
---|
525 | -- taxon, taxon_name
|
---|
526 | -- unfortunately, we can't constrain parent_taxon_id as it is violated
|
---|
527 | -- occasionally by the downloads available from NCBI
|
---|
528 | -- ALTER TABLE taxon ADD CONSTRAINT FKtaxon_taxon
|
---|
529 | -- FOREIGN KEY ( parent_taxon_id ) REFERENCES taxon ( taxon_id )
|
---|
530 | -- DEFERRABLE;
|
---|
531 | ALTER TABLE taxon_name ADD CONSTRAINT FKtaxon_taxonname
|
---|
532 | FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id )
|
---|
533 | ON DELETE CASCADE ;
|
---|
534 |
|
---|
535 | -- bioentry
|
---|
536 | ALTER TABLE bioentry ADD CONSTRAINT FKtaxon_bioentry
|
---|
537 | FOREIGN KEY ( taxon_id ) REFERENCES taxon ( taxon_id ) ;
|
---|
538 | ALTER TABLE bioentry ADD CONSTRAINT FKbiodatabase_bioentry
|
---|
539 | FOREIGN KEY ( biodatabase_id ) REFERENCES biodatabase ( biodatabase_id ) ;
|
---|
540 | -- bioentry_relationship
|
---|
541 | ALTER TABLE bioentry_relationship ADD CONSTRAINT FKterm_bioentryrel
|
---|
542 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
543 | ALTER TABLE bioentry_relationship ADD CONSTRAINT FKparentent_bioentryrel
|
---|
544 | FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
545 | ON DELETE CASCADE ;
|
---|
546 | ALTER TABLE bioentry_relationship ADD CONSTRAINT FKchildent_bioentryrel
|
---|
547 | FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
548 | ON DELETE CASCADE ;
|
---|
549 |
|
---|
550 | -- bioentry_path
|
---|
551 | ALTER TABLE bioentry_path ADD CONSTRAINT FKterm_bioentrypath
|
---|
552 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
553 | ALTER TABLE bioentry_path ADD CONSTRAINT FKparentent_bioentrypath
|
---|
554 | FOREIGN KEY ( object_bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
555 | ON DELETE CASCADE ;
|
---|
556 | ALTER TABLE bioentry_path ADD CONSTRAINT FKchildent_bioentrypath
|
---|
557 | FOREIGN KEY ( subject_bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
558 | ON DELETE CASCADE ;
|
---|
559 |
|
---|
560 | -- biosequence
|
---|
561 | ALTER TABLE biosequence ADD CONSTRAINT FKbioentry_bioseq
|
---|
562 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
563 | ON DELETE CASCADE ;
|
---|
564 |
|
---|
565 | -- comment
|
---|
566 | ALTER TABLE comment ADD CONSTRAINT FKbioentry_comment
|
---|
567 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
568 | ON DELETE CASCADE ;
|
---|
569 |
|
---|
570 | -- bioentry_dbxref
|
---|
571 | ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKbioentry_dblink
|
---|
572 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
573 | ON DELETE CASCADE ;
|
---|
574 | ALTER TABLE bioentry_dbxref ADD CONSTRAINT FKdbxref_dblink
|
---|
575 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
|
---|
576 | ON DELETE CASCADE ;
|
---|
577 |
|
---|
578 | -- dbxref_qualifier_value
|
---|
579 | ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKtrm_dbxrefqual
|
---|
580 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
581 | ALTER TABLE dbxref_qualifier_value ADD CONSTRAINT FKdbxref_dbxrefqual
|
---|
582 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
|
---|
583 | ON DELETE CASCADE ;
|
---|
584 |
|
---|
585 | -- bioentry_reference
|
---|
586 | ALTER TABLE bioentry_reference ADD CONSTRAINT FKbioentry_entryref
|
---|
587 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
588 | ON DELETE CASCADE ;
|
---|
589 | ALTER TABLE bioentry_reference ADD CONSTRAINT FKreference_entryref
|
---|
590 | FOREIGN KEY ( reference_id ) REFERENCES reference ( reference_id )
|
---|
591 | ON DELETE CASCADE ;
|
---|
592 |
|
---|
593 | -- reference
|
---|
594 | ALTER TABLE reference ADD CONSTRAINT FKdbxref_reference
|
---|
595 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
|
---|
596 |
|
---|
597 | -- bioentry_qualifier_value
|
---|
598 | ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKbioentry_entqual
|
---|
599 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
600 | ON DELETE CASCADE ;
|
---|
601 | ALTER TABLE bioentry_qualifier_value ADD CONSTRAINT FKterm_entqual
|
---|
602 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
603 |
|
---|
604 | -- seqfeature
|
---|
605 | ALTER TABLE seqfeature ADD CONSTRAINT FKterm_seqfeature
|
---|
606 | FOREIGN KEY ( type_term_id ) REFERENCES term ( term_id ) ;
|
---|
607 | ALTER TABLE seqfeature ADD CONSTRAINT FKsourceterm_seqfeature
|
---|
608 | FOREIGN KEY ( source_term_id ) REFERENCES term ( term_id ) ;
|
---|
609 | ALTER TABLE seqfeature ADD CONSTRAINT FKbioentry_seqfeature
|
---|
610 | FOREIGN KEY ( bioentry_id ) REFERENCES bioentry ( bioentry_id )
|
---|
611 | ON DELETE CASCADE ;
|
---|
612 |
|
---|
613 | -- seqfeature_relationship
|
---|
614 | ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKterm_seqfeatrel
|
---|
615 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
616 | ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKparentfeat_seqfeatrel
|
---|
617 | FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
618 | ON DELETE CASCADE ;
|
---|
619 | ALTER TABLE seqfeature_relationship ADD CONSTRAINT FKchildfeat_seqfeatrel
|
---|
620 | FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
621 | ON DELETE CASCADE ;
|
---|
622 |
|
---|
623 | -- seqfeature_path
|
---|
624 | ALTER TABLE seqfeature_path ADD CONSTRAINT FKterm_seqfeatpath
|
---|
625 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
626 | ALTER TABLE seqfeature_path ADD CONSTRAINT FKparentfeat_seqfeatpath
|
---|
627 | FOREIGN KEY ( object_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
628 | ON DELETE CASCADE ;
|
---|
629 | ALTER TABLE seqfeature_path ADD CONSTRAINT FKchildfeat_seqfeatpath
|
---|
630 | FOREIGN KEY ( subject_seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
631 | ON DELETE CASCADE ;
|
---|
632 |
|
---|
633 | -- seqfeature_qualifier_value
|
---|
634 | ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKterm_featqual
|
---|
635 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
636 | ALTER TABLE seqfeature_qualifier_value ADD CONSTRAINT FKseqfeature_featqual
|
---|
637 | FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
638 | ON DELETE CASCADE ;
|
---|
639 |
|
---|
640 | -- seqfeature_dbxref
|
---|
641 | ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKseqfeature_feadblink
|
---|
642 | FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
643 | ON DELETE CASCADE ;
|
---|
644 | ALTER TABLE seqfeature_dbxref ADD CONSTRAINT FKdbxref_feadblink
|
---|
645 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id )
|
---|
646 | ON DELETE CASCADE ;
|
---|
647 |
|
---|
648 | -- location
|
---|
649 | ALTER TABLE location ADD CONSTRAINT FKseqfeature_location
|
---|
650 | FOREIGN KEY ( seqfeature_id ) REFERENCES seqfeature ( seqfeature_id )
|
---|
651 | ON DELETE CASCADE ;
|
---|
652 | ALTER TABLE location ADD CONSTRAINT FKdbxref_location
|
---|
653 | FOREIGN KEY ( dbxref_id ) REFERENCES dbxref ( dbxref_id ) ;
|
---|
654 | ALTER TABLE location ADD CONSTRAINT FKterm_featloc
|
---|
655 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
656 |
|
---|
657 | -- location_qualifier_value
|
---|
658 | ALTER TABLE location_qualifier_value ADD CONSTRAINT FKfeatloc_locqual
|
---|
659 | FOREIGN KEY ( location_id ) REFERENCES location ( location_id )
|
---|
660 | ON DELETE CASCADE ;
|
---|
661 | ALTER TABLE location_qualifier_value ADD CONSTRAINT FKterm_locqual
|
---|
662 | FOREIGN KEY ( term_id ) REFERENCES term ( term_id ) ;
|
---|
663 |
|
---|
664 |
|
---|