R/get_dataset.R
convert_to_granges.Rd
This function converts, when possible, a regulon_result object into a GRanges object.
convert_to_granges(regulondb_result)
A GRanges object.
## Connect to the RegulonDB database if necessary
if (!exists("regulondb_conn")) regulondb_conn <- connect_database()
## Build the regulon db object
e_coli_regulondb <-
regulondb(
database_conn = regulondb_conn,
organism = "E.coli",
database_version = "1",
genome_version = "1"
)
## Obtain all the information from the "GENE" dataset
convert_to_granges(get_dataset(e_coli_regulondb, dataset = "GENE"))
#> Warning: Dropped 19 entries where genomic coordinates were NAs
#> GRanges object with 4583 ranges and 30 metadata columns:
#> seqnames ranges strand | id name bnumber
#> <Rle> <IRanges> <Rle> | <character> <character> <character>
#> [1] E.coli 4265782-4266861 + | ECK120000001 alr b4053
#> [2] E.coli 795862-796551 + | ECK120000002 modB b0764
#> [3] E.coli 2531463-2532224 + | ECK120000003 cysZ b2413
#> [4] E.coli 3812731-3813951 + | ECK120000004 dfp b3639
#> [5] E.coli 4347404-4348744 - | ECK120000005 dcuB b4123
#> ... ... ... ... . ... ... ...
#> [4579] E.coli 3032939-3033010 - | ECK125276531 yqfH b4753
#> [4580] E.coli 850332-850397 + | ECK125276532 yliM b4736
#> [4581] E.coli 1642122-1642211 + | ECK125276533 ynfS b4750
#> [4582] E.coli 568695-568844 + | ECK125276534 ylcJ b4733
#> [4583] E.coli 765050-765150 + | ECK125276535 sdhX b4764
#> gi synonyms dna_sequence
#> <character> <character> <character>
#> [1] <NA> ECK4045,EG10001,alr5.. ATGCAAGCGGCAACTGTTGT..
#> [2] <NA> ECK0753,EG10002,b076.. ATGATACTGACCGATCCAGA..
#> [3] <NA> ECK2408,EG10003,b2413 ATGGTTTCATCATTCACATC..
#> [4] <NA> ECK3629,EG10004,b363.. ATGAGCCTGGCCGGTAAAAA..
#> [5] <NA> ECK4116,EG10006,b412.. ATGTTATTTACTATCCAACT..
#> ... ... ... ...
#> [4579] <NA> ECK4610,G0-16747,b4753 ATGATTAACCAAGTGAGCGT..
#> [4580] <NA> ECK4593,G0-16731,b4736 ATGGAAACGTTCTGTTACAT..
#> [4581] <NA> ECK4607,G0-16744,b4750 ATGAATAACCCCGTCTGTCT..
#> [4582] <NA> ECK4590,G0-16728,b4733 ATGAGCCTCGTTTTATGCTT..
#> [4583] <NA> ECK4621,G0-17009,b47.. ATATCTGTAATAAGAAATAG..
#> external_db_link evidence_reference product_id
#> <character> <character> <character>
#> [1] ASAP,http://asap.aha.. <NA> ECK120004477
#> [2] ASAP,http://asap.aha.. <NA> ECK120004478
#> [3] ASAP,http://asap.aha.. <NA> ECK120004479
#> [4] ASAP,http://asap.aha.. <NA> ECK120004480
#> [5] ASAP,http://asap.aha.. <NA> ECK120004481
#> ... ... ... ...
#> [4579] <NA> <NA> ECK125276571
#> [4580] <NA> <NA> ECK125276556
#> [4581] <NA> <NA> ECK125276575
#> [4582] <NA> <NA> ECK125276577
#> [4583] <NA> \t\t\t30591570,30541135 ECK125276565
#> product_name product_synonym product_sequence
#> <character> <character> <character>
#> [1] alanine racemase 1 Alr,alanine racemase.. MQAATVVINRRALRHNLQRL..
#> [2] molybdate ABC transp.. ChlJ,ModB,TslJ MILTDPEWQAVLLSLKVSSL..
#> [3] sulfate:H<sup>+</sup.. CysZ MVSSFTSAPRSGFYYFAQGW..
#> [4] fused 4'-phosphopant.. CoaBC,Dfp MSLAGKKIVLGVSGGIAAYK..
#> [5] anaerobic C4-dicarbo.. DcuB,GenF MLFTIQLIIILICLFYGARK..
#> ... ... ... ...
#> [4579] protein YqfH YqfH MINQVSVYRQPPVLSGCRQV..
#> [4580] protein YliM YliM METFCYMKWPVRHHKSRRVSH
#> [4581] Qin prophage; protei.. YnfS MNNPVCLDDWLIGFKSLCCT..
#> [4582] protein YlcJ YlcJ MSLVLCFLLMSLFFMYSFVL..
#> [4583] small regulatory RNA.. RybD,SdhX AUAUCUGUAAUAAGAAAUAG..
#> molecular_weigth isoelectric_point celullar_location
#> <numeric> <numeric> <character>
#> [1] 39.153 7.059 cytosol
#> [2] 24.939 10.716 inner membrane
#> [3] 29.305 9.809 inner membrane
#> [4] 43.438 7.585 cytosol
#> [5] 47.935 7.878 inner membrane
#> ... ... ... ...
#> [4579] 2.617 10.452 <NA>
#> [4580] 2.716 10.877 <NA>
#> [4581] 3.193 4.113 inner membrane
#> [4582] 5.943 10.891 <NA>
#> [4583] NA NA <NA>
#> product_note product_type go_index_bp
#> <character> <character> <character>
#> [1] <NA> <NA> GO:0006522\tGO:000836..
#> [2] ModB is the predicte.. <NA> GO:0015689\tGO:0055085
#> [3] CysZ is a high affin.. <NA> GO:0000103\tGO:000827..
#> [4] The <i>dfp</i> (<i>c.. <NA> GO:0008152\tGO:001593..
#> [5] DcuB is a C4-dicarbo.. <NA> GO:0009061\tGO:001574..
#> ... ... ... ...
#> [4579] YqfH was identified .. <NA> <NA>
#> [4580] YliM was identified .. <NA> <NA>
#> [4581] YnfS was identified .. <NA> <NA>
#> [4582] YlcJ was identified .. <NA> <NA>
#> [4583] The small regulatory.. small RNA GO:0040033
#> go_desc_bp go_index_cc go_desc_cc
#> <character> <character> <character>
#> [1] alanine metabolic pr.. GO:0005829 cytosol
#> [2] molybdate ion transp.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> [3] sulfate assimilation.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> [4] metabolic process\tco.. GO:0005737\tGO:0005829 cytoplasm,cytosol
#> [5] anaerobic respiratio.. GO:0005886\tGO:000588.. plasma membrane,inte..
#> ... ... ... ...
#> [4579] <NA> <NA> <NA>
#> [4580] <NA> <NA> <NA>
#> [4581] <NA> GO:0005886\tGO:001602.. plasma membrane,memb..
#> [4582] <NA> <NA> <NA>
#> [4583] negative regulation .. <NA> <NA>
#> go_index_mf go_desc_mf
#> <character> <character>
#> [1] GO:0003824\tGO:000878.. catalytic activity\ta..
#> [2] GO:0015098 molybdate ion transm..
#> [3] GO:0009675\tGO:0015116 high-affinity sulfat..
#> [4] GO:0003824\tGO:000463.. catalytic activity\tp..
#> [5] GO:0005469\tGO:0015556 succinate:fumarate a..
#> ... ... ...
#> [4579] <NA> <NA>
#> [4580] <NA> <NA>
#> [4581] <NA> <NA>
#> [4582] <NA> <NA>
#> [4583] GO:0005515\tGO:0048027 protein binding\tmRNA..
#> product_external_db_links product_ev_ref operon_id
#> <character> <character> <character>
#> [1] ECOCYC\thttp://biocyc.. <NA> ECK120029421
#> [2] ECOCYC\thttp://biocyc.. <NA> ECK120014822
#> [3] ECOCYC\thttp://biocyc.. \t\t\t6341507,29792261 ECK120029422
#> [4] DIP\thttp://dip.doe-m.. <NA> ECK120030583
#> [5] ECOCYC\thttp://biocyc.. \t\t\t20860483,1512189 ECK120014710
#> ... ... ... ...
#> [4579] ECOCYC\thttp://biocyc.. <NA> <NA>
#> [4580] ECOCYC\thttp://biocyc.. <NA> ECK125285808
#> [4581] ECOCYC\thttp://biocyc.. <NA> <NA>
#> [4582] ECOCYC\thttp://biocyc.. <NA> <NA>
#> [4583] ECOCYC\thttp://biocyc.. IMP\t\tInferred from m.. ECK125285815
#> operon_name tu_promoter sigma_factor
#> <character> <character> <character>
#> [1] alr ECK120029337\t\tECK120.. <NA>
#> [2] modABC ECK120009489\tmodABC\t.. Sigma28,Sigma70
#> [3] cysZ <NA> <NA>
#> [4] dfp ECK120030182\tdfp\tECK.. <NA>
#> [5] dcuB-fumB ECK120009650\tdcuB-fu.. Sigma70
#> ... ... ... ...
#> [4579] <NA> <NA> <NA>
#> [4580] yliM-ompX ECK120034934\tyliM-om.. Sigma70
#> [4581] <NA> <NA> <NA>
#> [4582] <NA> <NA> <NA>
#> [4583] sdhCDAB-sucABCD-sdhX ECK120009705\tsdhCDAB.. Sigma70
#> gene_tf
#> <character>
#> [1] <NA>
#> [2] ECK120011235\tCRP ECK..
#> [3] <NA>
#> [4] <NA>
#> [5] ECK120011235\tCRP ECK..
#> ... ...
#> [4579] <NA>
#> [4580] <NA>
#> [4581] <NA>
#> [4582] <NA>
#> [4583] ECK120011345\tArcA EC..
#> -------
#> seqinfo: 1 sequence from an unspecified genome; no seqlengths