################################################################## ## Initial configuration ################################################################## ## Clone or download the repository and set the working directory ## with setwd to the folder where the repository is located. ################################################################## ## Air Quality in Madrid ################################################################## ## codeStations.csv is extracted from the document ## http://www.mambiente.munimadrid.es/opencms/export/sites/default/calaire/Anexos/INTPHORA-DIA.pdf, ## table of page 3. codEstaciones <- read.csv2('data/codeStations.csv') codURL <- as.numeric(substr(codEstaciones$Codigo, 7, 8)) ## The information of each measuring station is available at its own webpage, defined by codURL URLs <- paste('http://www.mambiente.munimadrid.es/opencms/opencms/calaire/contenidos/estaciones/estacion', codURL, '.html', sep = '') ################################################################## ## Data arrangement ################################################################## library(XML) library(sp) ## Access each webpage, retrieve tables and extract long/lat data coords <- lapply(URLs, function(est){ tables <- readHTMLTable(est) location <- tables[[2]] ## Clean the table content and convert to dms format ub2dms <- function(x){ ch <- as.character(x) ch <- sub(',', '.', ch) ch <- sub('O', 'W', ch) ## Some stations use "O" instead of "W" as.numeric(char2dms(ch, "º", "'", "'' ")) } long <- ub2dms(location[2,1]) lat <- ub2dms(location[2,2]) alt <- as.numeric(sub(' m.', '', location[2, 3])) coords <- data.frame(long = long, lat = lat, alt = alt) coords }) airStations <- cbind(codEstaciones, do.call(rbind, coords)) ## The longitude of "El Pardo" station is wrong (positive instead of negative) airStations$long[22] <- -airStations$long[22] write.csv2(airStations, file = 'data/airStations.csv') rawData <- readLines('data/Datos11.txt') ## This loop reads each line and extracts fields as defined by the ## INTPHORA file: ## http://www.mambiente.munimadrid.es/opencms/export/sites/default/calaire/Anexos/INTPHORA-DIA.pdf datos11 <- lapply(rawData, function(x){ codEst <- substr(x, 1, 8) codParam <- substr(x, 9, 10) codTec <- substr(x, 11, 12) codPeriod <- substr(x, 13, 14) month <- substr(x, 17, 18) dat <- substr(x, 19, nchar(x)) ## "N" used for impossible days (31st April) idxN <- gregexpr('N', dat)[[1]] if (idxN==-1) idxN <- numeric(0) nZeroDays <- length(idxN) day <- seq(1, 31-nZeroDays) ## Substitute V and N with ";" to split data from different days dat <- gsub('[VN]+', ';', dat) dat <- as.numeric(strsplit(dat, ';')[[1]]) ## Only data from valid days dat <- dat[day] res <- data.frame(codEst, codParam, ##codTec, codPeriod, month, day, year = 2016, dat) }) datos11 <- do.call(rbind, datos11) write.csv2(datos11, 'data/airQuality.csv') ################################################################## ## Combine data and spatial locations ################################################################## library(sp) ## Spatial location of stations airStations <- read.csv2('data/airStations.csv') coordinates(airStations) <- ~ long + lat ## Geographical projection proj4string(airStations) <- CRS("+proj=longlat +ellps=WGS84 +datum=WGS84") ## Measurements data airQuality <- read.csv2('data/airQuality.csv') ## Only interested in NO2 NO2 <- airQuality[airQuality$codParam==8, ] NO2agg <- aggregate(dat ~ codEst, data = NO2, FUN = function(x) { c(mean = signif(mean(x), 3), median = median(x), sd = signif(sd(x), 3)) }) NO2agg <- do.call(cbind, NO2agg) NO2agg <- as.data.frame(NO2agg) library(rgdal) library(maptools) ## Link aggregated data with stations to obtain a SpatialPointsDataFrame. ## Codigo and codEst are the stations codes idxNO2 <- match(airStations$Codigo, NO2agg$codEst) NO2sp <- spCbind(airStations[, c('Nombre', 'alt')], NO2agg[idxNO2, ]) ## Save the result writeOGR(NO2sp, dsn = 'data/', layer = 'NO2sp', driver = 'ESRI Shapefile') ################################################################## ## Photographs of the stations ################################################################## library(XML) old <- setwd('images') for (i in 1:nrow(NO2df)) { codEst <- NO2df[i, "codEst"] ## Webpage of each station codURL <- as.numeric(substr(codEst, 7, 8)) rootURL <- 'http://www.mambiente.munimadrid.es' stationURL <- paste(rootURL, '/opencms/opencms/calaire/contenidos/estaciones/estacion', codURL, '.html', sep = '') content <- htmlParse(stationURL, encoding = 'utf8') ## Extracted with http://www.selectorgadget.com/ xPath <- '//*[contains(concat( " ", @class, " " ), concat( " ", "imagen_1", " " ))]' imageStation <- getNodeSet(content, xPath)[[1]] imageURL <- xmlAttrs(imageStation)[1] imageURL <- paste(rootURL, imageURL, sep = '') download.file(imageURL, destfile = paste(codEst, '.jpg', sep = '')) } setwd(old) ################################################################## ## Spanish General Elections ################################################################## dat2016 <- read.csv('data/GeneralSpanishElections2016.csv') census <- dat2016$Total.censo.electoral validVotes <- dat2016$Votos.válidos ## Election results per political party and municipality votesData <- dat2016[, -(1:13)] ## Abstention as an additional party votesData$ABS <- census - validVotes ## UP is a coalition of several parties UPcols <- grep("PODEMOS|ECP", names(votesData)) votesData$UP <- rowSums(votesData[, UPcols]) votesData[, UPcols] <- NULL ## Winner party at each municipality whichMax <- apply(votesData, 1, function(x)names(votesData)[which.max(x)]) ## Results of the winner party at each municipality Max <- apply(votesData, 1, max) ## OTH for everything but PP, PSOE, UP, Cs, and ABS whichMax[!(whichMax %in% c('PP', 'PSOE', 'UP', 'C.s', 'ABS'))] <- 'OTH' ## Percentage of votes with the electoral census pcMax <- Max/census * 100 ## Province-Municipality code. sprintf formats a number with leading zeros. PROVMUN <- with(dat2016, paste(sprintf('%02d', Código.de.Provincia), sprintf('%03d', Código.de.Municipio), sep="")) votes2016 <- data.frame(PROVMUN, whichMax, Max, pcMax) write.csv(votes2016, 'data/votes2016.csv', row.names = FALSE) ################################################################## ## Administrative boundaries ################################################################## library(sp) library(rgdal) old <- setwd(tempdir()) download.file('ftp://www.ine.es/pcaxis/mapas_completo_municipal.rar', 'mapas_completo_municipal.rar') system2('unrar', c('e', 'mapas_completo_municipal.rar')) spMap <- readOGR("esp_muni_0109.shp", p4s = "+proj=utm +zone=30 +ellps=GRS80 +units=m +no_defs") Encoding(levels(spMap$NOMBRE)) <- "latin1" setwd(old) ## dissolve repeated polygons spPols <- unionSpatialPolygons(spMap, spMap$PROVMUN) votes2016 <- read.csv('data/votes2016.csv', colClasses = c('factor', 'factor', 'numeric', 'numeric')) ## Match polygons and data using ID slot and PROVMUN column IDs <- sapply(spPols@polygons, function(x)x@ID) idx <- match(IDs, votes2016$PROVMUN) ##Places without information idxNA <- which(is.na(idx)) ##Information to be added to the SpatialPolygons object dat2add <- votes2016[idx, ] ## SpatialPolygonsDataFrame uses row names to match polygons with data row.names(dat2add) <- IDs spMapVotes <- SpatialPolygonsDataFrame(spPols, dat2add) ## Drop those places without information spMapVotes0 <- spMapVotes[-idxNA, ] ## Save the result writeOGR(spMapVotes0, dsn = 'data/', layer = 'spMapVotes0', drive = 'ESRI Shapefile') ## Extract Canarias islands from the SpatialPolygons object canarias <- sapply(spMapVotes0@polygons, function(x)substr(x@ID, 1, 2) %in% c("35", "38")) peninsula <- spMapVotes0[!canarias,] island <- spMapVotes0[canarias,] ## Shift the island extent box to position them at the bottom right corner dy <- bbox(peninsula)[2,1] - bbox(island)[2,1] dx <- bbox(peninsula)[1,2] - bbox(island)[1,2] island2 <- elide(island, shift = c(dx, dy)) bbIslands <- bbox(island2) proj4string(island2) <- proj4string(peninsula) ## Bind Peninsula (without islands) with shifted islands spMapVotes <- rbind(peninsula, island2) ## Save the result writeOGR(spMapVotes, dsn = 'data/', layer = 'spMapVotes', drive = 'ESRI Shapefile') ################################################################## ## CM SAF ################################################################## library(raster) tmp <- tempdir() unzip('data/SISmm2008_CMSAF.zip', exdir = tmp) filesCMSAF <- dir(tmp, pattern = 'SISmm') SISmm <- stack(paste(tmp, filesCMSAF, sep = '/')) ## CM-SAF data is average daily irradiance (W/m2). Multiply by 24 ## hours to obtain daily irradiation (Wh/m2) SISmm <- SISmm * 24 ## Monthly irradiation: each month by the corresponding number of days daysMonth <- c(31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) SISm <- SISmm * daysMonth / 1000 ## kWh/m2 ## Annual average SISav <- sum(SISm)/sum(daysMonth) writeRaster(SISav, file = 'SISav') library(raster) ## http://neo.sci.gsfc.nasa.gov/Search.html?group=64 pop <- raster('875430rgb-167772161.0.FLOAT.TIFF') ## http://neo.sci.gsfc.nasa.gov/Search.html?group=20 landClass <- raster('241243rgb-167772161.0.TIFF')