- tessera
- Interactive plotting for large, complex data
- tmap
- Thematic maps
- oaPlots
- Density-plot-type legends
- dendextend
- Dendrograms
- plotROC
- Interactive ROC plots
23 July 2015
Consists of two main packages:
Fundamentally, all data types are stored in a back-end as key/value pairs
# look at housing data str(housing)
## 'data.frame': 224369 obs. of 7 variables: ## $ fips : Factor w/ 3235 levels "01001","01003",..: 187 187 187 187 187 187 187 187 187 187 ... ## $ county : Factor w/ 1969 levels "Abbeville County",..: 17 17 17 17 17 17 17 17 17 17 ... ## $ state : Factor w/ 57 levels "AK","AL","AR",..: 6 6 6 6 6 6 6 6 6 6 ... ## $ time : Date, format: "2008-10-01" "2008-11-01" ... ## $ nSold : num NA NA NA NA NA NA NA NA NA NA ... ## $ medListPriceSqft: num 308 299 NA 290 288 ... ## $ medSoldPriceSqft: num 326 NA 318 306 292 ...
# divide by county and state byCounty <- divide(housing, by = c("county", "state"), update = TRUE)
## * Running map/reduce to get missing attributes...
class(byCounty)
## [1] "ddf" "ddo" "kvMemory"
byCounty[[1]]
## $key ## [1] "county=Abbeville County|state=SC" ## ## $value ## fips time nSold medListPriceSqft medSoldPriceSqft ## 1 45001 2008-10-01 NA 73.06226 NA ## 2 45001 2008-11-01 NA 70.71429 NA ## 3 45001 2008-12-01 NA 70.71429 NA ## 4 45001 2009-01-01 NA 73.43750 NA ## 5 45001 2009-02-01 NA 78.69565 NA ## ...
# medListPriceSqft and medSoldPriceSqft by time timePanel <- function(x) xyplot(medListPriceSqft + medSoldPriceSqft ~ time, data = x, auto.key = TRUE, ylab = "$ / Sq. Ft.") # test the panel function on one division head(byCounty[[1]][[2]], 6)
## fips time nSold medListPriceSqft medSoldPriceSqft ## 1 45001 2008-10-01 NA 73.06226 NA ## 2 45001 2008-11-01 NA 70.71429 NA ## 3 45001 2008-12-01 NA 70.71429 NA ## 4 45001 2009-01-01 NA 73.43750 NA ## 5 45001 2009-02-01 NA 78.69565 NA ## 6 45001 2009-03-01 NA 76.38889 NA
class(byCounty[[1]][[2]])
## [1] "data.frame"
timePanel(byCounty[[1]][[2]])
# slope of fitted line of list price for each county lmCoef <- function(x) coef(lm(medListPriceSqft ~ time, data = x))[2] priceCog <- function(x) { list( slope = cog(lmCoef(x), desc = "list price slope"), meanList = cogMean(x$medListPriceSqft), listRange = cogRange(x$medListPriceSqft), nObs = cog(sum(!is.na(x$medListPriceSqft)), desc = "number of non-NA list prices") )}
priceCog(byCounty[[1]][[2]])
## $slope ## time ## -0.0002323686 ## ## $meanList ## [1] 72.76927 ## ## $listRange ## [1] 23.08482 ## ## $nObs ## [1] 66
vdbConn("housing_vdb", autoYes = TRUE)
makeDisplay(byCounty, name = "list_sold_vs_time_datadr", desc = "List and sold price over time", panelFn = timePanel, width = 400, height = 400, lims = list(x = "same") ) view()
A ggplot-like plotting plackage for thematic maps:
library(tmap) data(Europe) qtm(Europe)
qtm(Europe, fill = "gdp_cap_est", text = "iso_a3", text.size = "AREA", root = 5, fill.title = "GDP per capita", fill.textNA = "Non-European countries", theme = "Europe")
Plotting with tmap elements
The main plotting method, the equivalent to ggplot2's ggplot, consists of elements that start with tm_. The first element to start with is tm_shape, which specifies the shape object. Next, one, or a combination of the following drawing layers should be specified:
tm_shape(Europe) + tm_fill("gdp_cap_est", textNA="Non-European countries", title="GDP per capita")
tm_shape(Europe) + tm_fill("gdp_cap_est", textNA="Non-European countries", title="GDP per capita") + tm_borders()
tm_shape(Europe) + tm_fill("gdp_cap_est", textNA="Non-European countries", title="GDP per capita") + tm_borders() + tm_text("iso_a3", size="AREA", root=5)
tm_shape(Europe) + tm_fill("gdp_cap_est", textNA="Non-European countries", title="GDP per capita") + tm_borders() + tm_text("iso_a3", size="AREA", root=5) + tm_layout_Europe()
data(metro) tm_shape(metro) + tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6), sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population")
tm_shape(metro) + tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6), sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population") + tm_text("name", size="pop2010", scale=1, ymod=-.02, root=4, size.lowerbound = .60)
tm_shape(Europe) + tm_fill("pop_est_dens", style="kmeans", textNA="Non-European countries", title="Country population density (per km2)") + tm_borders() + tm_text("iso_a3", size="area", scale=1.5, root=8, size.lowerbound = .40, fontface="bold", case=NA, fontcolor = "gray35")
tm_shape(Europe) + tm_fill("pop_est_dens", style="kmeans", textNA="Non-European countries", title="Country population density (per km2)") + tm_borders() + tm_text("iso_a3", size="area", scale=1.5, root=8, size.lowerbound = .40, fontface="bold", case=NA, fontcolor = "gray35") + tm_shape(metro) + tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6), sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population") + tm_layout_Europe()
install.packages("oaPlots", repos = "http://repos.openanalytics.eu", type = "source")
library(oaPlots) # dsub is a subset of the diamonds data frame # define color pallette, color vector and color region breaks colorPalette <- brewer.pal(9, "Blues")[4:9] # RColorBrewer function colorObj <- splitColorVar(colorVar = dsub$z, colorPalette) # oaPlots function colorVec <- colorObj$colorVec breaks <- colorObj$breaks # plot the data prepLegend(side = "right", proportion = 0.3) # oaPlots function oaTemplate(xlim = range(dsub$x), ylim = range(dsub$y), xlab = "X", ylab = "Y") # oaPlots function points(x = dsub$x, y = dsub$y, col = colorVec, pch = 19, cex = 0.6) # add the legend densityLegend(x = dsub$z, colorPalette = colorPalette, side = "right", main = "Z", colorBreaks = breaks)
## ## Welcome to dendextend version 1.1.0 ## ## Type ?dendextend to access the overall documentation and ## browseVignettes(package = 'dendextend') for the package vignette. ## You can execute a demo of the package via: demo(dendextend) ## ## More information is available on the dendextend project web-site: ## https://github.com/talgalili/dendextend/ ## ## Contact: <tal.galili@gmail.com> ## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues ## ## To suppress the this message use: ## suppressPackageStartupMessages(library(dendextend)) ## ## ## Attaching package: 'dendextend' ## ## The following object is masked from 'package:datadr': ## ## %>% ## ## The following object is masked from 'package:stats': ## ## cutree
library(dendextend)
dend <- c(1:5) %>% dist %>% hclust("ave") %>% as.dendrogram plot(dend)
# Labels labels(dend) <- c("A", "B", "extend", "dend", "C") # Label colors labels_colors(dend) <- rainbow(5) plot(dend)
dend <- color_branches(dend, k = 2) plot(dend) dend2 <- sort(dend) plot(dend2)
tanglegram(dend, dend2)
library(plotROC) shiny_plotROC()
print(Cervantes)
## [1] "Facts are the enemy of truth"