read functions for BED and related formats. Filenames can be local file or URLs. The read functions load data into tbls with consistent chrom, start and end colnames.

read_bed(filename, n_fields = 3, col_types = bed12_coltypes, sort = TRUE,
  ...)

read_bed12(filename, ...)

read_bedgraph(filename, ...)

read_narrowpeak(filename, ...)

read_broadpeak(filename, ...)

Arguments

filename

file or URL

n_fields

number fields in the BED file

col_types

column type spec for readr::read_tsv()

sort

sort the tbl by chrom and start

...

options to pass to readr::read_tsv()

Value

tbl_interval()

Details

https://genome.ucsc.edu/FAQ/FAQformat.html#format1

https://genome.ucsc.edu/FAQ/FAQformat.html#format1

https://genome.ucsc.edu/goldenPath/help/bedgraph.html

https://genome.ucsc.edu/FAQ/FAQformat.html#format12

https://genome.ucsc.edu/FAQ/FAQformat.html#format13

See also

Other read functions: read_genome, read_vcf

Examples

# read_bed assumes 3 field BED format. read_bed(valr_example('3fields.bed.gz'))
#> # A tibble: 10 x 3 #> chrom start end #> <chr> <int> <int> #> 1 chr1 11873 14409 #> 2 chr1 14361 19759 #> 3 chr1 14406 29370 #> 4 chr1 34610 36081 #> 5 chr1 69090 70008 #> 6 chr1 134772 140566 #> 7 chr1 321083 321115 #> 8 chr1 321145 321207 #> 9 chr1 322036 326938 #> 10 chr1 327545 328439
read_bed(valr_example('6fields.bed.gz'), n_fields = 6)
#> # A tibble: 10 x 6 #> chrom start end name score strand #> <chr> <int> <int> <chr> <chr> <chr> #> 1 chr1 11873 14409 DDX11L1 3 + #> 2 chr1 14361 19759 WASH7P 10 - #> 3 chr1 14406 29370 WASH7P 7 - #> 4 chr1 34610 36081 FAM138F 3 - #> 5 chr1 69090 70008 OR4F5 1 + #> 6 chr1 134772 140566 LOC729737 3 - #> 7 chr1 321083 321115 DQ597235 1 + #> 8 chr1 321145 321207 DQ599768 1 + #> 9 chr1 322036 326938 LOC100133331 3 + #> 10 chr1 327545 328439 LOC388312 1 +
# result is sorted by chrom and start unless `sort = FALSE` read_bed(valr_example('3fields.bed.gz'), sort = FALSE)
#> # A tibble: 10 x 3 #> chrom start end #> <chr> <int> <int> #> 1 chr1 11873 14409 #> 2 chr1 14361 19759 #> 3 chr1 14406 29370 #> 4 chr1 34610 36081 #> 5 chr1 69090 70008 #> 6 chr1 134772 140566 #> 7 chr1 321083 321115 #> 8 chr1 321145 321207 #> 9 chr1 322036 326938 #> 10 chr1 327545 328439
read_bed12(valr_example('mm9.refGene.bed.gz'))
#> # A tibble: 100 x 12 #> chrom start end name score strand cds_start cds_end item_rgb exon_count #> <chr> <int> <int> <chr> <chr> <chr> <int> <int> <chr> <int> #> 1 chr1 3.20e6 3.66e6 NM_0… 0 - 3206102 3661429 0 3 #> 2 chr1 4.28e6 4.40e6 NM_0… 0 - 4283061 4399268 0 4 #> 3 chr1 4.85e6 4.89e6 NM_0… 0 + 4847994 4886445 0 10 #> 4 chr1 4.85e6 4.89e6 NM_0… 0 + 4847994 4886445 0 10 #> 5 chr1 4.85e6 4.89e6 NM_0… 0 + 4848488 4886445 0 10 #> 6 chr1 4.90e6 5.01e6 NM_0… 0 - 4900554 4914046 0 5 #> 7 chr1 4.90e6 5.01e6 NM_0… 0 - 4900554 5009460 0 5 #> 8 chr1 4.90e6 5.06e6 NM_0… 0 - 4900554 5060258 0 6 #> 9 chr1 5.07e6 5.15e6 NM_0… 0 + 5074531 5152246 0 13 #> 10 chr1 5.07e6 5.15e6 NM_1… 0 + 5074531 5152246 0 14 #> # ... with 90 more rows, and 2 more variables: exon_sizes <chr>, #> # exon_starts <chr>
read_bedgraph(valr_example('test.bg.gz'))
#> # A tibble: 4 x 4 #> chrom start end value #> <chr> <int> <int> <dbl> #> 1 chr19 49302000 49302300 -1 #> 2 chr19 49302300 49302600 -0.75 #> 3 chr19 49302600 49302900 -0.5 #> 4 chr19 49302900 49303200 -0.25
read_narrowpeak(valr_example('sample.narrowPeak.gz'))
#> # A tibble: 570 x 10 #> chrom start end name score strand signal pvalue qvalue peak #> <chr> <int> <int> <chr> <int> <chr> <dbl> <dbl> <dbl> <int> #> 1 chr22 17372940 17373090 . 0 . 4 4.63 -1 -1 #> 2 chr22 17392200 17392350 . 0 . 5 4.67 -1 -1 #> 3 chr22 17398400 17398550 . 0 . 10 11.6 -1 -1 #> 4 chr22 17539180 17539330 . 0 . 21 30.9 -1 -1 #> 5 chr22 17652440 17652590 . 0 . 6 5.35 -1 -1 #> 6 chr22 17652780 17652930 . 0 . 12 12.5 -1 -1 #> 7 chr22 17980800 17980950 . 0 . 12 12.6 -1 -1 #> 8 chr22 18038260 18038410 . 0 . 29 36.0 -1 -1 #> 9 chr22 18225280 18225430 . 0 . 21 25.0 -1 -1 #> 10 chr22 18268020 18268170 . 0 . 14 13.0 -1 -1 #> # ... with 560 more rows
read_broadpeak(valr_example('sample.broadPeak.gz'))
#> # A tibble: 1,181 x 9 #> chrom start end name score strand signal pvalue qvalue #> <chr> <int> <int> <chr> <int> <chr> <dbl> <dbl> <dbl> #> 1 chr22 16847903 16848440 . 503 . 10.5 2.5 -1 #> 2 chr22 16849452 16851326 . 483 . 9.81 15.7 -1 #> 3 chr22 16849955 16850086 . 1000 . 32.4 4.2 -1 #> 4 chr22 16850694 16850924 . 831 . 22.5 4.9 -1 #> 5 chr22 16852964 16853782 . 499 . 10.4 6.2 -1 #> 6 chr22 16855065 16855803 . 477 . 9.58 3.9 -1 #> 7 chr22 16855944 16856974 . 491 . 10.1 8.5 -1 #> 8 chr22 16857425 16857958 . 505 . 10.6 2.5 -1 #> 9 chr22 16858284 16858824 . 549 . 12.2 4.5 -1 #> 10 chr22 16859972 16862024 . 404 . 6.89 9.3 -1 #> # ... with 1,171 more rows