This function retrieves data from RegulonDB. Attributes from datasets can be selected and filtered.
get_dataset(
regulondb,
dataset = NULL,
attributes = NULL,
filters = NULL,
and = TRUE,
interval = NULL,
partialmatch = NULL,
output_format = "regulondb_result"
)
A regulondb()
object.
Dataset of interest. Use the function list_datasets for an overview of valid datasets.
Vector of attributes to be retrieved.
List of filters to be used. The names should correspond to the attribute and the values correspond to the condition for selection.
Logical argument. If FALSE, filters will be considered under the "OR" operator
the filters whose values will be considered as interval
name of the condition(s) with a string pattern for full or partial match in the query
A string specifying the output format. Possible options are "regulondb_result", "GRanges", "DNAStringSet" or "BStringSet".
By default, a regulon_results object. If specified in the parameter output_format, it can also return either a GRanges object or a Biostrings object.
## Connect to the RegulonDB database if necessary
if (!exists("regulondb_conn")) regulondb_conn <- connect_database()
## Build the regulon db object
e_coli_regulondb <-
regulondb(
database_conn = regulondb_conn,
organism = "E.coli",
database_version = "1",
genome_version = "1"
)
## Obtain all the information from the "GENE" dataset
get_dataset(e_coli_regulondb, dataset = "GENE")
#> regulondb_result with 4602 rows and 33 columns
#> id name bnumber gi synonyms
#> <character> <character> <character> <character> <character>
#> 1 ECK120000001 alr b4053 NA ECK4045,EG10001,alr5..
#> 2 ECK120000002 modB b0764 NA ECK0753,EG10002,b076..
#> 3 ECK120000003 cysZ b2413 NA ECK2408,EG10003,b2413
#> 4 ECK120000004 dfp b3639 NA ECK3629,EG10004,b363..
#> 5 ECK120000005 dcuB b4123 NA ECK4116,EG10006,b412..
#> ... ... ... ... ... ...
#> 4598 ECK125276531 yqfH b4753 NA ECK4610,G0-16747,b4753
#> 4599 ECK125276532 yliM b4736 NA ECK4593,G0-16731,b4736
#> 4600 ECK125276533 ynfS b4750 NA ECK4607,G0-16744,b4750
#> 4601 ECK125276534 ylcJ b4733 NA ECK4590,G0-16728,b4733
#> 4602 ECK125276535 sdhX b4764 NA ECK4621,G0-17009,b47..
#> posleft posright strand dna_sequence
#> <integer> <integer> <character> <character>
#> 1 4265782 4266861 forward ATGCAAGCGGCAACTGTTGT..
#> 2 795862 796551 forward ATGATACTGACCGATCCAGA..
#> 3 2531463 2532224 forward ATGGTTTCATCATTCACATC..
#> 4 3812731 3813951 forward ATGAGCCTGGCCGGTAAAAA..
#> 5 4347404 4348744 reverse ATGTTATTTACTATCCAACT..
#> ... ... ... ... ...
#> 4598 3032939 3033010 reverse ATGATTAACCAAGTGAGCGT..
#> 4599 850332 850397 forward ATGGAAACGTTCTGTTACAT..
#> 4600 1642122 1642211 forward ATGAATAACCCCGTCTGTCT..
#> 4601 568695 568844 forward ATGAGCCTCGTTTTATGCTT..
#> 4602 765050 765150 forward ATATCTGTAATAAGAAATAG..
#> external_db_link evidence_reference product_id
#> <character> <character> <character>
#> 1 ASAP,http://asap.aha.. NA ECK120004477
#> 2 ASAP,http://asap.aha.. NA ECK120004478
#> 3 ASAP,http://asap.aha.. NA ECK120004479
#> 4 ASAP,http://asap.aha.. NA ECK120004480
#> 5 ASAP,http://asap.aha.. NA ECK120004481
#> ... ... ... ...
#> 4598 NA NA ECK125276571
#> 4599 NA NA ECK125276556
#> 4600 NA NA ECK125276575
#> 4601 NA NA ECK125276577
#> 4602 NA \t\t\t30591570,30541135 ECK125276565
#> product_name product_synonym product_sequence
#> <character> <character> <character>
#> 1 alanine racemase 1 Alr,alanine racemase.. MQAATVVINRRALRHNLQRL..
#> 2 molybdate ABC transp.. ChlJ,ModB,TslJ MILTDPEWQAVLLSLKVSSL..
#> 3 sulfate:H<sup>+</sup.. CysZ MVSSFTSAPRSGFYYFAQGW..
#> 4 fused 4'-phosphopant.. CoaBC,Dfp MSLAGKKIVLGVSGGIAAYK..
#> 5 anaerobic C4-dicarbo.. DcuB,GenF MLFTIQLIIILICLFYGARK..
#> ... ... ... ...
#> 4598 protein YqfH YqfH MINQVSVYRQPPVLSGCRQV..
#> 4599 protein YliM YliM METFCYMKWPVRHHKSRRVSH
#> 4600 Qin prophage; protei.. YnfS MNNPVCLDDWLIGFKSLCCT..
#> 4601 protein YlcJ YlcJ MSLVLCFLLMSLFFMYSFVL..
#> 4602 small regulatory RNA.. RybD,SdhX AUAUCUGUAAUAAGAAAUAG..
#> molecular_weigth isoelectric_point celullar_location
#> <numeric> <numeric> <character>
#> 1 39.153 7.059 cytosol
#> 2 24.939 10.716 inner membrane
#> 3 29.305 9.809 inner membrane
#> 4 43.438 7.585 cytosol
#> 5 47.935 7.878 inner membrane
#> ... ... ... ...
#> 4598 2.617 10.452 NA
#> 4599 2.716 10.877 NA
#> 4600 3.193 4.113 inner membrane
#> 4601 5.943 10.891 NA
#> 4602 NA NA NA
#> product_note product_type go_index_bp
#> <character> <character> <character>
#> 1 NA NA GO:0006522\tGO:000836..
#> 2 ModB is the predicte.. NA GO:0015689\tGO:0055085
#> 3 CysZ is a high affin.. NA GO:0000103\tGO:000827..
#> 4 The <i>dfp</i> (<i>c.. NA GO:0008152\tGO:001593..
#> 5 DcuB is a C4-dicarbo.. NA GO:0009061\tGO:001574..
#> ... ... ... ...
#> 4598 YqfH was identified .. NA NA
#> 4599 YliM was identified .. NA NA
#> 4600 YnfS was identified .. NA NA
#> 4601 YlcJ was identified .. NA NA
#> 4602 The small regulatory.. small RNA GO:0040033
#> go_desc_bp go_index_cc go_desc_cc
#> <character> <character> <character>
#> 1 alanine metabolic pr.. GO:0005829 cytosol
#> 2 molybdate ion transp.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> 3 sulfate assimilation.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> 4 metabolic process\tco.. GO:0005737\tGO:0005829 cytoplasm,cytosol
#> 5 anaerobic respiratio.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> ... ... ... ...
#> 4598 NA NA NA
#> 4599 NA NA NA
#> 4600 NA GO:0005886\tGO:001602.. plasma membrane,memb..
#> 4601 NA NA NA
#> 4602 negative regulation .. NA NA
#> go_index_mf go_desc_mf product_external_db_links
#> <character> <character> <character>
#> 1 GO:0003824\tGO:000878.. catalytic activity\ta.. ECOCYC\thttp://biocyc..
#> 2 GO:0015098 molybdate ion transm.. ECOCYC\thttp://biocyc..
#> 3 GO:0009675\tGO:0015116 high-affinity sulfat.. ECOCYC\thttp://biocyc..
#> 4 GO:0003824\tGO:000463.. catalytic activity\tp.. DIP\thttp://dip.doe-m..
#> 5 GO:0005469\tGO:0015556 succinate:fumarate a.. ECOCYC\thttp://biocyc..
#> ... ... ... ...
#> 4598 NA NA ECOCYC\thttp://biocyc..
#> 4599 NA NA ECOCYC\thttp://biocyc..
#> 4600 NA NA ECOCYC\thttp://biocyc..
#> 4601 NA NA ECOCYC\thttp://biocyc..
#> 4602 GO:0005515\tGO:0048027 protein binding\tmRNA.. ECOCYC\thttp://biocyc..
#> product_ev_ref operon_id operon_name
#> <character> <character> <character>
#> 1 NA ECK120029421 alr
#> 2 NA ECK120014822 modABC
#> 3 \t\t\t6341507,29792261 ECK120029422 cysZ
#> 4 NA ECK120030583 dfp
#> 5 \t\t\t20860483,1512189 ECK120014710 dcuB-fumB
#> ... ... ... ...
#> 4598 NA NA NA
#> 4599 NA ECK125285808 yliM-ompX
#> 4600 NA NA NA
#> 4601 NA NA NA
#> 4602 IMP\t\tInferred from m.. ECK125285815 sdhCDAB-sucABCD-sdhX
#> tu_promoter sigma_factor gene_tf
#> <character> <character> <character>
#> 1 ECK120029337\t\tECK120.. NA NA
#> 2 ECK120009489\tmodABC\t.. Sigma28,Sigma70 ECK120011235\tCRP ECK..
#> 3 NA NA NA
#> 4 ECK120030182\tdfp\tECK.. NA NA
#> 5 ECK120009650\tdcuB-fu.. Sigma70 ECK120011235\tCRP ECK..
#> ... ... ... ...
#> 4598 NA NA NA
#> 4599 ECK120034934\tyliM-om.. Sigma70 NA
#> 4600 NA NA NA
#> 4601 NA NA NA
#> 4602 ECK120009705\tsdhCDAB.. Sigma70 ECK120011345\tArcA EC..
## Get the attributes posright and name from the "GENE" dataset
get_dataset(e_coli_regulondb,
dataset = "GENE",
attributes = c("posright", "name")
)
#> regulondb_result with 4602 rows and 2 columns
#> posright name
#> <integer> <character>
#> 1 4266861 alr
#> 2 796551 modB
#> 3 2532224 cysZ
#> 4 3813951 dfp
#> 5 4348744 dcuB
#> ... ... ...
#> 4598 3033010 yqfH
#> 4599 850397 yliM
#> 4600 1642211 ynfS
#> 4601 568844 ylcJ
#> 4602 765150 sdhX
## From "GENE" dataset, get the gene name, strand, posright, product name
## and id of all genes regulated with name like "ara", strand as "forward"
## with a position right between 2000 and 40000
get_dataset(
e_coli_regulondb,
dataset = "GENE",
attributes = c("name", "strand", "posright", "product_name", "id"),
filters = list(
name = c("ara"),
strand = c("forward"),
posright = c("2000", "40000")
),
and = TRUE,
partialmatch = "name",
interval = "posright"
)
#> regulondb_result with 1 row and 5 columns
#> name strand posright product_name id
#> <character> <character> <integer> <character> <character>
#> 1 carA forward 30799 carbamoyl phosphate .. ECK120000130