This function retrieves data from RegulonDB. Attributes from datasets can be selected and filtered.
get_dataset( regulondb, dataset = NULL, attributes = NULL, filters = NULL, and = TRUE, interval = NULL, partialmatch = NULL, output_format = "regulondb_result" )
regulondb | A |
---|---|
dataset | Dataset of interest. Use the function list_datasets for an overview of valid datasets. |
attributes | Vector of attributes to be retrieved. |
filters | List of filters to be used. The names should correspond to the attribute and the values correspond to the condition for selection. |
and | Logical argument. If FALSE, filters will be considered under the "OR" operator |
interval | the filters whose values will be considered as interval |
partialmatch | name of the condition(s) with a string pattern for full or partial match in the query |
output_format | A string specifying the output format. Possible options are "regulondb_result", "GRanges", "DNAStringSet" or "BStringSet". |
By default, a regulon_results object. If specified in the parameter output_format, it can also return either a GRanges object or a Biostrings object.
Carmina Barberena Jonas, Jesús Emiliano Sotelo Fonseca, José Alquicira Hernández, Joselyn Chávez
## Connect to the RegulonDB database if necessary if (!exists("regulondb_conn")) regulondb_conn <- connect_database() #> snapshotDate(): 2021-10-20 ## Build the regulon db object e_coli_regulondb <- regulondb( database_conn = regulondb_conn, organism = "E.coli", database_version = "1", genome_version = "1" ) ## Obtain all the information from the "GENE" dataset get_dataset(e_coli_regulondb, dataset = "GENE") #> regulondb_result with 4602 rows and 33 columns #> id name bnumber gi synonyms #> <character> <character> <character> <character> <character> #> 1 ECK120000001 alr b4053 NA ECK4045,EG10001,alr5.. #> 2 ECK120000002 modB b0764 NA ECK0753,EG10002,b076.. #> 3 ECK120000003 cysZ b2413 NA ECK2408,EG10003,b2413 #> 4 ECK120000004 dfp b3639 NA ECK3629,EG10004,b363.. #> 5 ECK120000005 dcuB b4123 NA ECK4116,EG10006,b412.. #> ... ... ... ... ... ... #> 4598 ECK125276531 yqfH b4753 NA ECK4610,G0-16747,b4753 #> 4599 ECK125276532 yliM b4736 NA ECK4593,G0-16731,b4736 #> 4600 ECK125276533 ynfS b4750 NA ECK4607,G0-16744,b4750 #> 4601 ECK125276534 ylcJ b4733 NA ECK4590,G0-16728,b4733 #> 4602 ECK125276535 sdhX b4764 NA ECK4621,G0-17009,b47.. #> posleft posright strand dna_sequence #> <integer> <integer> <character> <character> #> 1 4265782 4266861 forward ATGCAAGCGGCAACTGTTGT.. #> 2 795862 796551 forward ATGATACTGACCGATCCAGA.. #> 3 2531463 2532224 forward ATGGTTTCATCATTCACATC.. #> 4 3812731 3813951 forward ATGAGCCTGGCCGGTAAAAA.. #> 5 4347404 4348744 reverse ATGTTATTTACTATCCAACT.. #> ... ... ... ... ... #> 4598 3032939 3033010 reverse ATGATTAACCAAGTGAGCGT.. #> 4599 850332 850397 forward ATGGAAACGTTCTGTTACAT.. #> 4600 1642122 1642211 forward ATGAATAACCCCGTCTGTCT.. #> 4601 568695 568844 forward ATGAGCCTCGTTTTATGCTT.. #> 4602 765050 765150 forward ATATCTGTAATAAGAAATAG.. #> external_db_link evidence_reference product_id #> <character> <character> <character> #> 1 ASAP,http://asap.aha.. NA ECK120004477 #> 2 ASAP,http://asap.aha.. NA ECK120004478 #> 3 ASAP,http://asap.aha.. NA ECK120004479 #> 4 ASAP,http://asap.aha.. NA ECK120004480 #> 5 ASAP,http://asap.aha.. NA ECK120004481 #> ... ... ... ... #> 4598 NA NA ECK125276571 #> 4599 NA NA ECK125276556 #> 4600 NA NA ECK125276575 #> 4601 NA NA ECK125276577 #> 4602 NA \t\t\t30591570,30541135 ECK125276565 #> product_name product_synonym product_sequence #> <character> <character> <character> #> 1 alanine racemase 1 Alr,alanine racemase.. MQAATVVINRRALRHNLQRL.. #> 2 molybdate ABC transp.. ChlJ,ModB,TslJ MILTDPEWQAVLLSLKVSSL.. #> 3 sulfate:H<sup>+</sup.. CysZ MVSSFTSAPRSGFYYFAQGW.. #> 4 fused 4'-phosphopant.. CoaBC,Dfp MSLAGKKIVLGVSGGIAAYK.. #> 5 anaerobic C4-dicarbo.. DcuB,GenF MLFTIQLIIILICLFYGARK.. #> ... ... ... ... #> 4598 protein YqfH YqfH MINQVSVYRQPPVLSGCRQV.. #> 4599 protein YliM YliM METFCYMKWPVRHHKSRRVSH #> 4600 Qin prophage; protei.. YnfS MNNPVCLDDWLIGFKSLCCT.. #> 4601 protein YlcJ YlcJ MSLVLCFLLMSLFFMYSFVL.. #> 4602 small regulatory RNA.. RybD,SdhX AUAUCUGUAAUAAGAAAUAG.. #> molecular_weigth isoelectric_point celullar_location #> <numeric> <numeric> <character> #> 1 39.153 7.059 cytosol #> 2 24.939 10.716 inner membrane #> 3 29.305 9.809 inner membrane #> 4 43.438 7.585 cytosol #> 5 47.935 7.878 inner membrane #> ... ... ... ... #> 4598 2.617 10.452 NA #> 4599 2.716 10.877 NA #> 4600 3.193 4.113 inner membrane #> 4601 5.943 10.891 NA #> 4602 NA NA NA #> product_note product_type go_index_bp #> <character> <character> <character> #> 1 NA NA GO:0006522\tGO:000836.. #> 2 ModB is the predicte.. NA GO:0015689\tGO:0055085 #> 3 CysZ is a high affin.. NA GO:0000103\tGO:000827.. #> 4 The <i>dfp</i> (<i>c.. NA GO:0008152\tGO:001593.. #> 5 DcuB is a C4-dicarbo.. NA GO:0009061\tGO:001574.. #> ... ... ... ... #> 4598 YqfH was identified .. NA NA #> 4599 YliM was identified .. NA NA #> 4600 YnfS was identified .. NA NA #> 4601 YlcJ was identified .. NA NA #> 4602 The small regulatory.. small RNA GO:0040033 #> go_desc_bp go_index_cc go_desc_cc #> <character> <character> <character> #> 1 alanine metabolic pr.. GO:0005829 cytosol #> 2 molybdate ion transp.. GO:0005886\tGO:000588.. plasma membrane,inte.. #> 3 sulfate assimilation.. GO:0005886\tGO:000588.. plasma membrane,inte.. #> 4 metabolic process\tco.. GO:0005737\tGO:0005829 cytoplasm,cytosol #> 5 anaerobic respiratio.. GO:0005886\tGO:000588.. plasma membrane,inte.. #> ... ... ... ... #> 4598 NA NA NA #> 4599 NA NA NA #> 4600 NA GO:0005886\tGO:001602.. plasma membrane,memb.. #> 4601 NA NA NA #> 4602 negative regulation .. NA NA #> go_index_mf go_desc_mf product_external_db_links #> <character> <character> <character> #> 1 GO:0003824\tGO:000878.. catalytic activity\ta.. ECOCYC\thttp://biocyc.. #> 2 GO:0015098 molybdate ion transm.. ECOCYC\thttp://biocyc.. #> 3 GO:0009675\tGO:0015116 high-affinity sulfat.. ECOCYC\thttp://biocyc.. #> 4 GO:0003824\tGO:000463.. catalytic activity\tp.. DIP\thttp://dip.doe-m.. #> 5 GO:0005469\tGO:0015556 succinate:fumarate a.. ECOCYC\thttp://biocyc.. #> ... ... ... ... #> 4598 NA NA ECOCYC\thttp://biocyc.. #> 4599 NA NA ECOCYC\thttp://biocyc.. #> 4600 NA NA ECOCYC\thttp://biocyc.. #> 4601 NA NA ECOCYC\thttp://biocyc.. #> 4602 GO:0005515\tGO:0048027 protein binding\tmRNA.. ECOCYC\thttp://biocyc.. #> product_ev_ref operon_id operon_name #> <character> <character> <character> #> 1 NA ECK120029421 alr #> 2 NA ECK120014822 modABC #> 3 \t\t\t6341507,29792261 ECK120029422 cysZ #> 4 NA ECK120030583 dfp #> 5 \t\t\t20860483,1512189 ECK120014710 dcuB-fumB #> ... ... ... ... #> 4598 NA NA NA #> 4599 NA ECK125285808 yliM-ompX #> 4600 NA NA NA #> 4601 NA NA NA #> 4602 IMP\t\tInferred from m.. ECK125285815 sdhCDAB-sucABCD-sdhX #> tu_promoter sigma_factor gene_tf #> <character> <character> <character> #> 1 ECK120029337\t\tECK120.. NA NA #> 2 ECK120009489\tmodABC\t.. Sigma28,Sigma70 ECK120011235\tCRP ECK.. #> 3 NA NA NA #> 4 ECK120030182\tdfp\tECK.. NA NA #> 5 ECK120009650\tdcuB-fu.. Sigma70 ECK120011235\tCRP ECK.. #> ... ... ... ... #> 4598 NA NA NA #> 4599 ECK120034934\tyliM-om.. Sigma70 NA #> 4600 NA NA NA #> 4601 NA NA NA #> 4602 ECK120009705\tsdhCDAB.. Sigma70 ECK120011345\tArcA EC.. ## Get the attributes posright and name from the "GENE" dataset get_dataset(e_coli_regulondb, dataset = "GENE", attributes = c("posright", "name") ) #> regulondb_result with 4602 rows and 2 columns #> posright name #> <integer> <character> #> 1 4266861 alr #> 2 796551 modB #> 3 2532224 cysZ #> 4 3813951 dfp #> 5 4348744 dcuB #> ... ... ... #> 4598 3033010 yqfH #> 4599 850397 yliM #> 4600 1642211 ynfS #> 4601 568844 ylcJ #> 4602 765150 sdhX ## From "GENE" dataset, get the gene name, strand, posright, product name ## and id of all genes regulated with name like "ara", strand as "forward" ## with a position right between 2000 and 40000 get_dataset( e_coli_regulondb, dataset = "GENE", attributes = c("name", "strand", "posright", "product_name", "id"), filters = list( name = c("ara"), strand = c("forward"), posright = c("2000", "40000") ), and = TRUE, partialmatch = "name", interval = "posright" ) #> regulondb_result with 1 row and 5 columns #> name strand posright product_name id #> <character> <character> <integer> <character> <character> #> 1 carA forward 30799 carbamoyl phosphate .. ECK120000130