- tessera
- Interactive plotting for large, complex data
- tmap
- Thematic maps
- oaPlots
- Density-plot-type legends
- dendextend
- Dendrograms
- plotROC
- Interactive ROC plots
23 July 2015
Consists of two main packages:
Fundamentally, all data types are stored in a back-end as key/value pairs
# look at housing data str(housing)
## 'data.frame': 224369 obs. of 7 variables: ## $ fips : Factor w/ 3235 levels "01001","01003",..: 187 187 187 187 187 187 187 187 187 187 ... ## $ county : Factor w/ 1969 levels "Abbeville County",..: 17 17 17 17 17 17 17 17 17 17 ... ## $ state : Factor w/ 57 levels "AK","AL","AR",..: 6 6 6 6 6 6 6 6 6 6 ... ## $ time : Date, format: "2008-10-01" "2008-11-01" ... ## $ nSold : num NA NA NA NA NA NA NA NA NA NA ... ## $ medListPriceSqft: num 308 299 NA 290 288 ... ## $ medSoldPriceSqft: num 326 NA 318 306 292 ...
# divide by county and state
byCounty <- divide(housing, by = c("county", "state"), update = TRUE)
## * Running map/reduce to get missing attributes...
class(byCounty)
## [1] "ddf" "ddo" "kvMemory"
byCounty[[1]]
## $key ## [1] "county=Abbeville County|state=SC" ## ## $value ## fips time nSold medListPriceSqft medSoldPriceSqft ## 1 45001 2008-10-01 NA 73.06226 NA ## 2 45001 2008-11-01 NA 70.71429 NA ## 3 45001 2008-12-01 NA 70.71429 NA ## 4 45001 2009-01-01 NA 73.43750 NA ## 5 45001 2009-02-01 NA 78.69565 NA ## ...
# medListPriceSqft and medSoldPriceSqft by time
timePanel <- function(x) xyplot(medListPriceSqft + medSoldPriceSqft ~
time, data = x, auto.key = TRUE, ylab = "$ / Sq. Ft.")
# test the panel function on one division
head(byCounty[[1]][[2]], 6)
## fips time nSold medListPriceSqft medSoldPriceSqft ## 1 45001 2008-10-01 NA 73.06226 NA ## 2 45001 2008-11-01 NA 70.71429 NA ## 3 45001 2008-12-01 NA 70.71429 NA ## 4 45001 2009-01-01 NA 73.43750 NA ## 5 45001 2009-02-01 NA 78.69565 NA ## 6 45001 2009-03-01 NA 76.38889 NA
class(byCounty[[1]][[2]])
## [1] "data.frame"
timePanel(byCounty[[1]][[2]])
# slope of fitted line of list price for each county
lmCoef <- function(x)
coef(lm(medListPriceSqft ~ time, data = x))[2]
priceCog <- function(x) { list(
slope = cog(lmCoef(x), desc = "list price slope"),
meanList = cogMean(x$medListPriceSqft),
listRange = cogRange(x$medListPriceSqft),
nObs = cog(sum(!is.na(x$medListPriceSqft)),
desc = "number of non-NA list prices")
)}
priceCog(byCounty[[1]][[2]])
## $slope ## time ## -0.0002323686 ## ## $meanList ## [1] 72.76927 ## ## $listRange ## [1] 23.08482 ## ## $nObs ## [1] 66
vdbConn("housing_vdb", autoYes = TRUE)
makeDisplay(byCounty, name = "list_sold_vs_time_datadr", desc = "List and sold price over time", panelFn = timePanel, width = 400, height = 400, lims = list(x = "same") ) view()
A ggplot-like plotting plackage for thematic maps:
library(tmap) data(Europe) qtm(Europe)
qtm(Europe,
fill = "gdp_cap_est",
text = "iso_a3",
text.size = "AREA",
root = 5,
fill.title = "GDP per capita",
fill.textNA = "Non-European countries",
theme = "Europe")
Plotting with tmap elements
The main plotting method, the equivalent to ggplot2's ggplot, consists of elements that start with tm_. The first element to start with is tm_shape, which specifies the shape object. Next, one, or a combination of the following drawing layers should be specified:
tm_shape(Europe) +
tm_fill("gdp_cap_est",
textNA="Non-European countries",
title="GDP per capita")
tm_shape(Europe) +
tm_fill("gdp_cap_est",
textNA="Non-European countries",
title="GDP per capita") +
tm_borders()
tm_shape(Europe) +
tm_fill("gdp_cap_est",
textNA="Non-European countries",
title="GDP per capita") +
tm_borders() +
tm_text("iso_a3", size="AREA", root=5)
tm_shape(Europe) +
tm_fill("gdp_cap_est",
textNA="Non-European countries",
title="GDP per capita") +
tm_borders() +
tm_text("iso_a3", size="AREA", root=5) +
tm_layout_Europe()
data(metro)
tm_shape(metro) +
tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6),
sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population")
tm_shape(metro) +
tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6),
sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population") +
tm_text("name", size="pop2010", scale=1, ymod=-.02, root=4, size.lowerbound = .60)
tm_shape(Europe) +
tm_fill("pop_est_dens", style="kmeans", textNA="Non-European countries",
title="Country population density (per km2)") +
tm_borders() +
tm_text("iso_a3", size="area", scale=1.5, root=8, size.lowerbound = .40,
fontface="bold", case=NA, fontcolor = "gray35")
tm_shape(Europe) +
tm_fill("pop_est_dens", style="kmeans", textNA="Non-European countries",
title="Country population density (per km2)") +
tm_borders() +
tm_text("iso_a3", size="area", scale=1.5, root=8, size.lowerbound = .40,
fontface="bold", case=NA, fontcolor = "gray35") +
tm_shape(metro) +
tm_bubbles("pop2010", "red", alpha = 0.5, size.lim = c(0, 11e6),
sizes.legend = seq(2e6,10e6, by=2e6), title.size="Metropolitan Population") +
tm_layout_Europe()
install.packages("oaPlots", repos = "http://repos.openanalytics.eu",
type = "source")
library(oaPlots)
# dsub is a subset of the diamonds data frame
# define color pallette, color vector and color region breaks
colorPalette <- brewer.pal(9, "Blues")[4:9] # RColorBrewer function
colorObj <- splitColorVar(colorVar = dsub$z, colorPalette) # oaPlots function
colorVec <- colorObj$colorVec
breaks <- colorObj$breaks
# plot the data
prepLegend(side = "right", proportion = 0.3) # oaPlots function
oaTemplate(xlim = range(dsub$x), ylim = range(dsub$y),
xlab = "X", ylab = "Y") # oaPlots function
points(x = dsub$x, y = dsub$y, col = colorVec, pch = 19, cex = 0.6)
# add the legend
densityLegend(x = dsub$z, colorPalette = colorPalette, side = "right",
main = "Z", colorBreaks = breaks)
## ## Welcome to dendextend version 1.1.0 ## ## Type ?dendextend to access the overall documentation and ## browseVignettes(package = 'dendextend') for the package vignette. ## You can execute a demo of the package via: demo(dendextend) ## ## More information is available on the dendextend project web-site: ## https://github.com/talgalili/dendextend/ ## ## Contact: <tal.galili@gmail.com> ## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues ## ## To suppress the this message use: ## suppressPackageStartupMessages(library(dendextend)) ## ## ## Attaching package: 'dendextend' ## ## The following object is masked from 'package:datadr': ## ## %>% ## ## The following object is masked from 'package:stats': ## ## cutree
library(dendextend)
dend <- c(1:5) %>%
dist %>%
hclust("ave") %>%
as.dendrogram
plot(dend)
# Labels
labels(dend) <- c("A", "B", "extend", "dend", "C")
# Label colors
labels_colors(dend) <- rainbow(5)
plot(dend)
dend <- color_branches(dend, k = 2) plot(dend) dend2 <- sort(dend) plot(dend2)
tanglegram(dend, dend2)
library(plotROC) shiny_plotROC()
print(Cervantes)
## [1] "Facts are the enemy of truth"