Factores, fechas y caracteres
Índice
1 factor
1.1 Construimos un ejemplo
N <- 100 edad <- sample(seq(18, 40, 1), N, replace=TRUE) summary(edad)
Min. 1st Qu. Median Mean 3rd Qu. Max. 18.00 24.00 27.50 28.35 33.00 40.00
sexo <- sample(c('H', 'M'), N, replace=TRUE) class(sexo) summary(sexo)
[1] "character" Length Class Mode 100 character character
1.2 Una variable cualitativa se define con factor
sexo <- factor(sexo)
class(sexo)
[1] "factor"
summary(sexo)
H M 46 54
levels(sexo)
[1] "H" "M"
nlevels(sexo)
[1] 2
1.3 Los factor
sirven para agrupar
- Con la función
table
table(edad > 30, sexo)
sexo H M FALSE 29 37 TRUE 17 17
table(edad %in% 20:30, sexo)
sexo H M FALSE 21 20 TRUE 25 34
1.4 Los factor
sirven para agrupar
- Con
tapply
oaggregate
tapply(edad, sexo, mean)
H M 28.97826 27.81481
aggregate(edad ~ sexo, FUN=median)
sexo edad 1 H 29 2 M 27
1.5 Los factores sirven para separar
edadSexo <- split(edad, sexo)
class(edadSexo)
[1] "list"
sapply(edadSexo, mean)
H M 28.97826 27.81481
1.6 Los factor
se pueden generar a partir de variables numéricas
- Por ejemplo, con
cut
gEdad <- cut(edad, breaks=4)
class(gEdad)
[1] "factor"
levels(gEdad)
[1] "(18,23.5]" "(23.5,29]" "(29,34.5]" "(34.5,40]"
- Nuevamente
table
table(gEdad)
gEdad (18,23.5] (23.5,29] (29,34.5] (34.5,40] 22 41 16 21
table(gEdad, sexo)
sexo gEdad H M (18,23.5] 10 12 (23.5,29] 16 25 (29,34.5] 8 8 (34.5,40] 12 9
2 Fechas
2.1 Date
as.Date('2013-02-06')
[1] "2013-02-06"
as.Date('2013/02/06')
[1] "2013-02-06"
as.Date('06.02.2013')
Error in charToDate(x) : character string is not in a standard unambiguous format
as.Date('06.02.2013', format='%d.%m.%Y')
[1] "2013-02-06"
as.Date(37, origin='2013-01-01')
[1] "2013-02-07"
2.2 Secuencias temporales con Date
seq(as.Date('2004-01-01'), by='day', length=10)
[1] "2004-01-01" "2004-01-02" "2004-01-03" "2004-01-04" "2004-01-05" [6] "2004-01-06" "2004-01-07" "2004-01-08" "2004-01-09" "2004-01-10"
seq(as.Date('2004-01-01'), by='month', length=10)
[1] "2004-01-01" "2004-02-01" "2004-03-01" "2004-04-01" "2004-05-01" [6] "2004-06-01" "2004-07-01" "2004-08-01" "2004-09-01" "2004-10-01"
seq(as.Date('2004-01-01'), by='10 day', length=10)
[1] "2004-01-01" "2004-01-11" "2004-01-21" "2004-01-31" "2004-02-10" [6] "2004-02-20" "2004-03-01" "2004-03-11" "2004-03-21" "2004-03-31"
2.3 POSIXct
help(format.POSIXct)
as.POSIXct('2013-02-06')
[1] "2013-02-06 CET"
ISOdate(2013, 2, 7)
[1] "2013-02-07 12:00:00 GMT"
hoy <- as.POSIXct('2013-02-06') format(hoy, '%Y')
[1] "2013"
format(hoy, '%d')
[1] "06"
format(hoy, '%m')
[1] "02"
format(hoy, '%b')
[1] "feb"
format(hoy, '%d de %B de %Y')
[1] "06 de febrero de 2013"
2.4 POSIxct
hora <- Sys.time()
hora
[1] "2015-05-07 12:27:21 CEST"
format(hora, '%H:%M:%S')
[1] "12:27:21"
format(hora, '%H horas, %M minutos y %S segundos')
[1] "12 horas, 27 minutos y 21 segundos"
2.5 Secuencias temporales con POSIXct
seq(as.POSIXct('2004-01-01'), by='month', length=10)
[1] "2004-01-01 CET" "2004-02-01 CET" "2004-03-01 CET" "2004-04-01 CEST" [5] "2004-05-01 CEST" "2004-06-01 CEST" "2004-07-01 CEST" "2004-08-01 CEST" [9] "2004-09-01 CEST" "2004-10-01 CEST"
seq(as.POSIXct('2004-01-01 10:00:00'), by='15 min', length=10)
[1] "2004-01-01 10:00:00 CET" "2004-01-01 10:15:00 CET" [3] "2004-01-01 10:30:00 CET" "2004-01-01 10:45:00 CET" [5] "2004-01-01 11:00:00 CET" "2004-01-01 11:15:00 CET" [7] "2004-01-01 11:30:00 CET" "2004-01-01 11:45:00 CET" [9] "2004-01-01 12:00:00 CET" "2004-01-01 12:15:00 CET"
2.6 Zonas horarias
as.POSIXct('2013-02-06 15:30:00', tz='GMT')
[1] "2013-02-06 15:30:00 GMT"
as.POSIXct('2013-02-06 15:30:00', tz='Europe/Madrid')
[1] "2013-02-06 15:30:00 CET"
hawaii <- as.POSIXct('2013-02-06 15:30:00', tz='HST') ## Character format(hawaii, tz='GMT')
[1] "2013-02-07 01:30:00"
## POSIXct as.POSIXct(format(hawaii, tz='GMT'), tz='GMT')
[1] "2013-02-07 01:30:00 GMT"
3 Caracteres
3.1 Bastan unas simples comillas
cadena <- "Hola mundo" class(cadena)
[1] "character"
nchar(cadena)
[1] 10
3.2 Un vector de character
cadenaVec <- c("Hola mundo", "Hello world") nchar(cadenaVec)
[1] 10 11
length(cadenaVec)
[1] 2
cadenaVec[1]
[1] "Hola mundo"
3.3 Para mostrarlos usamos cat
o print
a <- 2 b <- 3
cat('La suma de', a, 'y', b, 'es', a + b, fill=TRUE)
La suma de 2 y 3 es 5
cat('La suma de', a, 'y', b, 'es', a + b, '\n', 'La multiplicación de', a, 'por', b, 'es', a*b, '\n')
La suma de 2 y 3 es 5 La multiplicación de 2 por 3 es 6
3.4 Los character
se pueden unir…
paste('Hello', 'World', sep='_')
[1] "Hello_World"
paste('X', 1:5, sep='.')
[1] "X.1" "X.2" "X.3" "X.4" "X.5"
paste(c('A', 'B'), 1:5, sep='.')
[1] "A.1" "B.2" "A.3" "B.4" "A.5"
paste(c('A', 'B'), 1:5, sep='.', collapse='|')
[1] "A.1|B.2|A.3|B.4|A.5"
3.5 … y también se pueden separar…
strsplit(cadenaVec, split=' ')
[[1]] [1] "Hola" "mundo" [[2]] [1] "Hello" "world"
strsplit(cadenaVec, split='')
[[1]] [1] "H" "o" "l" "a" " " "m" "u" "n" "d" "o" [[2]] [1] "H" "e" "l" "l" "o" " " "w" "o" "r" "l" "d"
chSep <- strsplit(cadenaVec, split=' ') class(chSep)
[1] "list"
length(chSep)
[1] 2
sapply(chSep, nchar)
[,1] [,2] [1,] 4 5 [2,] 5 5
3.6 … y, por supuesto, manipular
sub('o', '0', 'Hola Mundo')
[1] "H0la Mundo"
gsub('o', '0', 'Hola Mundo')
[1] "H0la Mund0"
substring(cadena, 1) <- 'HOLA' cadena
[1] "HOLA mundo"
tolower(cadena)
[1] "hola mundo"
toupper(cadena)
[1] "HOLA MUNDO"