begin note
调用命令:r CMD BATCH D:/RWORKSPACE/CMD_TEST.R (注意 CMD BATCH 都要大写)
ls(): 列出所有的变量名称
ls(pattern ='v'): 根据pattern匹配
rm('xxx') 删除变量
rm(list=ls()) 删除所有的变量> ls()character(0)
集合转数组:
> vector1 <- c(5,9,3)> vector2 <- c(10,11,12,13,14,15)> column.names <- c("COL1","COL2","COL3")> row.names <- c("ROW1","ROW2","ROW3")> matrix.names <- c("Matrix1","Matrix2")> result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames = list(column.names,row.names,matrix.names))> PRint(result), , Matrix1 ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15, , Matrix2 ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15
> # Print the third row of the second matrix of the array.> print(result[3,,2])ROW1 ROW2 ROW3 3 12 15 > > # Print the element in the 1st row and 3rd column of the 1st matrix.> print(result[1,3,1])[1] 13> > # Print the 2nd Matrix.> print(result[,,2]) ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15数组的操作:
# Create two vectors of different lengths.vector1 <- c(5,9,3)vector2 <- c(10,11,12,13,14,15)# Take these vectors as input to the array.array1 <- array(c(vector1,vector2),dim=c(3,3,2))# Create two vectors of different lengths.vector3 <- c(9,1,0)vector4 <- c(6,0,11,3,14,1,2,6,9)array2 <- array(c(vector1,vector2),dim=c(3,3,2))# create matrices from these arrays.matrix1 <- array1[,,2]matrix2 <- array2[,,2]# Add the matrices.result <- matrix1+matrix2print(result)# Create two vectors of different lengths.vector1 <- c(5,9,3)vector2 <- c(10,11,12,13,14,15)# Take these vectors as input to the array.new.array <- array(c(vector1,vector2),dim=c(3,3,2))print(new.array)# Use apply to calculate the sum of the rows across all the matrices.计算所有矩阵每行的和result <- apply(new.array, c(1), sum)print(result)# Create a vector as input.data <- c("East","West","East","North","North","East","West","West","West","East","North")print(data)print(is.factor(data))# Apply the factor function.factor_data <- factor(data)print(factor_data)print(is.factor(factor_data))#判断是否是factor , true# Create the vectors for data frame.height <- c(132,151,162,139,166,147,122)weight <- c(48,49,66,53,67,52,40)gender <- c("male","male","female","female","male","female","male")# Create the data frame.input_data <- data.frame(height,weight,gender)print(input_data)# Test if the gender column is a factor. a row of data frame is a factor, like thisprint(is.factor(input_data$gender))# Print the gender column so see the levels.print(input_data$gender)data <- c("East","West","East","North","North","East","West","West","West","East","North")# Create the factorsfactor_data <- factor(data)print(factor_data)# Apply the factor function with required order of the level. 改变了level的顺序new_order_data <- factor(factor_data,levels = c("East","West","North"))print(new_order_data)gl(n, k, labels)以下是所使用的参数的说明:
n 是一个整数来给出级别数k 是一个整数给出重复的数量labels 为所得到的因子级别标签的向量。示例
v <- gl(3, 4, labels = c("Tampa", "Seattle","Boston"))print(v)创建数据帧
# Create the data frame.emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")), stringsAsFactors=FALSE )# Print the data frame. print(emp.data)str(emp.data)print(summary(emp.data))# Extract Specific columns.提取数据帧的列result <- data.frame(emp.data$emp_name,emp.data$salary)print(result)result <- emp.data[1:2,]#提取数据的前两行和所有的列print(result)result <- emp.data[c(3,5),c(2,4)]# 提取3, 5 行的第2,4 列的数据print(result)# Add the "dept" coulmn.添加列emp.data$dept <- c("IT","Operations","IT","HR","Finance")v <- emp.dataprint(v)# Create the first data frame.emp.data <- data.frame( emp_id = c (1:5), emp_name = c("Rick","Dan","Michelle","Ryan","Gary"), salary = c(623.3,515.2,611.0,729.0,843.25), start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")), dept=c("IT","Operations","IT","HR","Finance"), stringsAsFactors=FALSE )# Create the second data frame, 添加行记录emp.newdata <- data.frame( emp_id = c (6:8), emp_name = c("Rasmi","Pranab","Tusar"), salary = c(578.0,722.5,632.8), start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")), dept = c("IT","Operations","Fianance"), stringsAsFactors=FALSE )# Bind the two data frames.emp.finaldata <- rbind(emp.data,emp.newdata)print(emp.finaldata)install.packages(file_name_with_path, repos = NULL, type="source")# Install the package named "xml", 安装packageinstall.packages("E:/XML_3.98-1.3.zip", repos = NULL, type="source")# Create vector objects.city <- c("Tampa","Seattle","Hartford","Denver")state <- c("FL","WA","CT","CO")zipcode <- c(33602,98104,06161,80294)# Combine above three vectors into one data frame. cbind is column bind 行的结列addresses <- cbind(city,state,zipcode)# Print a header.cat("# # # # The First data frame/n") # Print the data frame.print(addresses)# Create another data frame with similar columnsnew.address <- data.frame( city = c("Lowry","Charlotte"), state = c("CO","FL"), zipcode = c("80230","33949"), stringsAsFactors=FALSE)# Print a header.cat("# # # The Second data frame/n") # Print the data frame.print(new.address)# Combine rows form both the data frames. rbind is row bind 结合行all.addresses <- rbind(addresses,new.address)# Print a header.cat("# # # The combined data frame/n") # Print the result.print(all.addresses)melt and cast
熔化和转换
R语言编程的最有趣的地方是关于改变多个步骤中的数据的形状来获得所希望的形状。用来做这种函数被称为 melt() 和 cast()。
我们认为数据集被称为 ships 出现在库被称为 "MASS".
library(MASS)print(ships)当我们上面的代码执行时,它产生以下结果:
type year period service incidents1 A 60 60 127 02 A 60 75 63 03 A 65 60 1095 34 A 65 75 1095 45 A 70 60 1512 6..........................8 A 75 75 2244 119 B 60 60 44882 3910 B 60 75 17176 2911 B 65 60 28609 58........................17 C 60 60 1179 118 C 60 75 552 119 C 65 60 781 0........................融化数据
现在,我们融化数据需要组织其转换类型(type), 并且 year 到多行以外的所有列。
molten.ships <- melt(ships, id = c("type","year"))print(molten.ships)当我们上面的代码执行时,它产生以下结果:
type year variable value1 A 60 period 602 A 60 period 753 A 65 period 604 A 65 period 75........................9 B 60 period 6010 B 60 period 7511 B 65 period 6012 B 65 period 7513 B 70 period 60......................41 A 60 service 12742 A 60 service 6343 A 65 service 1095......................70 D 70 service 120871 D 75 service 072 D 75 service 205173 E 60 service 4574 E 60 service 075 E 65 service 789......................101 C 70 incidents 6102 C 70 incidents 2103 C 75 incidents 0104 C 75 incidents 1105 D 60 incidents 0106 D 60 incidents 0......................转换数据
我们可以转化数据转换成在创建每种类型的 ships 每年的汇总的新形式。它是通过使用 case()函数。
recasted.ship <- cast(molten.ships, type+year~variable,sum)print(recasted.ship)当我们上面的代码执行时,它产生以下结果:
type year period service incidents1 A 60 135 190 02 A 65 135 2190 73 A 70 135 4865 244 A 75 135 2244 115 B 60 135 62058 686 B 65 135 48979 1117 B 70 135 20163 568 B 75 135 7117 189 C 60 135 1731 210 C 65 135 1457 111 C 70 135 2731 812 C 75 135 274 113 D 60 135 356 014 D 65 135 480 015 D 70 135 1557 1316 D 75 135 2051 417 E 60 135 45 018 E 65 135 1226 1419 E 70 135 3318 1720 E 75 135 542 1读一个CSV文件
以下是 read.csv()函数的一个简单的例子,它读取在当前工作目录的可用的 CSV 文件:
data <- read.csv("input.csv")print(data)data <- read.csv("input.csv")#分析data的行列情况print(is.data.frame(data))print(ncol(data))print(nrow(data))# Get the person detail having max salary. 求最高工资记录的具体情况retval <- subset(data, salary == max(salary))print(retval)# Create a data frame.data <- read.csv("input.csv")info <- subset(data, salary > 600 & dept == "IT")#工资大于600 并且是IT部门的员工print(info)# Create a data frame.data <- read.csv("input.csv")retval <- subset(data, as.Date(start_date) > as.Date("2014-01-01"))#生日大于2014-1-1日print(retval)# Load the packages required to read XML files.library("XML")library("methods")# Convert the input xml file to a data frame.xmldataframe <- xmlToDataFrame("input.xml")#加载xml里面的数据print(xmldataframe)# Load the package required to read JSON files.library("rjson")# Give the input file name to the function.result <- fromJSON(file="input.json")# Print the result.print(result)# Load the package required to read JSON files.library("rjson")# Give the input file name to the function.result <- fromJSON(file="input.json")# Convert JSON file to a data frame.json_data_frame <- as.data.frame(result)#json字符类型 到frame 帧print(json_data_frame)# Create a connection Object to MySQL database.# We will connect to the sampel database named "sakila" that comes with MySql installation. mysqlconnection = dbConnect(MySQL(), user='root', passWord='', dbname='sakila', host='localhost')# List the tables available in this database. dbListTables(mysqlconnection)# Query the "actor" tables to get all the rows.输入sqlresult = dbSendQuery(mysqlconnection, "select * from actor")# Store the result in a R data frame object. n=5 is used to fetch first 5 rows.现在查询的条数data.frame = fetch(result, n=5)print(data.fame)# Create the function. 找到向量中出现次数最多的元素getmode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))]}# Create the vector with numbers.v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3)# Calculate the mode using the user function. result <- getmode(v)print(result)K线图:
library(quantmod)sse<-getSymbols('^SSEC', from='2015-1-1',to=Sys.Date(), src='yahoo')SSEC.m <- to.monthly(SSEC)tail(SSEC.m)candleChart(SSEC.m,theme = 'white')
新闻热点
疑难解答