mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-17 16:36:02 +00:00
41 lines
1.9 KiB
Plaintext
Executable File
41 lines
1.9 KiB
Plaintext
Executable File
The Domain Theory (for recognizing promoters):
|
|
|
|
% Promoters have a region where a protein (RNA polymerase) must make contact
|
|
% and the helical DNA sequence must have a valid conformation so that
|
|
% the two pieces of the contact region spatially align.
|
|
% Prolog notation is used.
|
|
promoter :- contact, conformation.
|
|
|
|
% There are two regions "upstream" from the beginning of the gene
|
|
% at which the RNA polymerase makes contact.
|
|
contact :- minus_35, minus_10.
|
|
|
|
% The following rules describe the compositions of possible contact regions.
|
|
minus_35 :- p-37=c, p-36=t, p-35=t, p-34=g, p-33=a, p-32=c.
|
|
minus_35 :- p-36=t, p-35=t, p-34=g, p-32=c, p-31=a.
|
|
minus_35 :- p-36=t, p-35=t, p-34=g, p-33=a, p-32=c, p-31=a.
|
|
minus_35 :- p-36=t, p-35=t, p-34=g, p-33=a, p-32=c.
|
|
|
|
minus_10 :- p-14 t, p-13 a, p-12=t, p-11=a, p-10=a, p-9=t.
|
|
minus_10 :- p-13 t, p-12=a, p-10=a, p-8=t.
|
|
minus_10 :- p-13 t, p-12=a, p-11=t, p-10=a, p-9=a, p-8=t.
|
|
minus_10 :- p-12=t, p-11=a, p-7=t.
|
|
|
|
% The following rules describe sequence characteristics that produce
|
|
% acceptable conformations.
|
|
conformation :- p-47=c, p-46=a, p-45=a, p-43=t, p-42=t, p-40=a, p-39=c,
|
|
p-22=g, p-18=t, p-16=c, p-8=g, p-7=c, p-6=g, p-5=c,
|
|
p-4=c, p-2=c, p-1=c.
|
|
conformation :- p-45=a, p-44=a, p-41=a.
|
|
conformation :- p-49=a, p-44=t, p-27=t, p-22=a, p-18=t, p-16=t, p-15=g,
|
|
p-1=a.
|
|
conformation :- p-45=a, p-41=a, p-28=t, p-27=t, p-23=t, p-21=a, p-20=a,
|
|
p-17=t, p-15=t, p-4=t.
|
|
|
|
% If exact matches are required, this domain theory matches NONE
|
|
% of the examples below. Also note that some of the MINUS_35 rules
|
|
% are subsumed by another MINUS_35 rule. This occurs because the
|
|
% biological evidence is inconclusive wrt the correct specificity.
|
|
|
|
|