R 笔记

2019-11-10 20:08:49

字体：大中小

来源：转载

供稿：网友

begin note

调用命令：r CMD BATCH D:/RWORKSPACE/CMD_TEST.R （注意 CMD BATCH 都要大写）

ls(): 列出所有的变量名称

ls(pattern ='v'): 根据pattern匹配

rm('xxx') 删除变量

rm(list=ls()) 删除所有的变量> ls()character(0)

集合转数组：

> vector1 <- c(5,9,3)> vector2 <- c(10,11,12,13,14,15)> column.names <- c("COL1","COL2","COL3")> row.names <- c("ROW1","ROW2","ROW3")> matrix.names <- c("Matrix1","Matrix2")> result <- array(c(vector1,vector2),dim=c(3,3,2),dimnames = list(column.names,row.names,matrix.names))> PRint(result), , Matrix1 ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15, , Matrix2 ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15

> # Print the third row of the second matrix of the array.> print(result[3,,2])ROW1 ROW2 ROW3 3 12 15 > > # Print the element in the 1st row and 3rd column of the 1st matrix.> print(result[1,3,1])[1] 13> > # Print the 2nd Matrix.> print(result[,,2]) ROW1 ROW2 ROW3COL1 5 10 13COL2 9 11 14COL3 3 12 15数组的操作：

# Create two vectors of different lengths.vector1 <- c(5,9,3)vector2 <- c(10,11,12,13,14,15)# Take these vectors as input to the array.array1 <- array(c(vector1,vector2),dim=c(3,3,2))# Create two vectors of different lengths.vector3 <- c(9,1,0)vector4 <- c(6,0,11,3,14,1,2,6,9)array2 <- array(c(vector1,vector2),dim=c(3,3,2))# create matrices from these arrays.matrix1 <- array1[,,2]matrix2 <- array2[,,2]# Add the matrices.result <- matrix1+matrix2print(result)# Create two vectors of different lengths.vector1 <- c(5,9,3)vector2 <- c(10,11,12,13,14,15)# Take these vectors as input to the array.new.array <- array(c(vector1,vector2),dim=c(3,3,2))print(new.array)# Use apply to calculate the sum of the rows across all the matrices.计算所有矩阵每行的和result <- apply(new.array, c(1), sum)print(result)# Create a vector as input.data <- c("East","West","East","North","North","East","West","West","West","East","North")print(data)print(is.factor(data))# Apply the factor function.factor_data <- factor(data)print(factor_data)print(is.factor(factor_data))#判断是否是factor  , true# Create the vectors for data frame.height <- c(132,151,162,139,166,147,122)weight <- c(48,49,66,53,67,52,40)gender <- c("male","male","female","female","male","female","male")# Create the data frame.input_data <- data.frame(height,weight,gender)print(input_data)# Test if the gender column is a factor.  a row of data frame is a factor, like thisprint(is.factor(input_data$gender))# Print the gender column so see the levels.print(input_data$gender)data <- c("East","West","East","North","North","East","West","West","West","East","North")# Create the factorsfactor_data <- factor(data)print(factor_data)# Apply the factor function with required order of the level. 改变了level的顺序new_order_data <- factor(factor_data,levels = c("East","West","North"))print(new_order_data)gl(n, k, labels)以下是所使用的参数的说明：
n 是一个整数来给出级别数k 是一个整数给出重复的数量labels 为所得到的因子级别标签的向量。示例
v <- gl(3, 4, labels = c("Tampa", "Seattle","Boston"))print(v)
创建数据帧
# Create the data frame.emp.data <- data.frame(	emp_id = c (1:5), 	emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),	salary = c(623.3,515.2,611.0,729.0,843.25), 	start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")),	stringsAsFactors=FALSE			)# Print the data frame.			print(emp.data) 
str(emp.data)print(summary(emp.data))  # Extract Specific columns.提取数据帧的列result <- data.frame(emp.data$emp_name,emp.data$salary)print(result)result <- emp.data[1:2,]#提取数据的前两行和所有的列print(result)result <- emp.data[c(3,5),c(2,4)]# 提取3， 5 行的第2,4 列的数据print(result)# Add the "dept" coulmn.添加列emp.data$dept <- c("IT","Operations","IT","HR","Finance")v <- emp.dataprint(v)
# Create the first data frame.emp.data <- data.frame(	emp_id = c (1:5), 	emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),	salary = c(623.3,515.2,611.0,729.0,843.25), 	start_date = as.Date(c("2012-01-01","2013-09-23","2014-11-15","2014-05-11","2015-03-27")),	dept=c("IT","Operations","IT","HR","Finance"),	stringsAsFactors=FALSE			)# Create the second data frame， 添加行记录emp.newdata <- 	data.frame(	emp_id = c (6:8), 	emp_name = c("Rasmi","Pranab","Tusar"),	salary = c(578.0,722.5,632.8), 	start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")),	dept = c("IT","Operations","Fianance"),	stringsAsFactors=FALSE				)# Bind the two data frames.emp.finaldata <- rbind(emp.data,emp.newdata)print(emp.finaldata)install.packages(file_name_with_path, repos = NULL, type="source")# Install the package named "xml", 安装packageinstall.packages("E:/XML_3.98-1.3.zip", repos = NULL, type="source")
# Create vector objects.city <- c("Tampa","Seattle","Hartford","Denver")state <- c("FL","WA","CT","CO")zipcode <- c(33602,98104,06161,80294)# Combine above three vectors into one data frame. cbind is column bind 行的结列addresses <- cbind(city,state,zipcode)# Print a header.cat("# # # # The First data frame/n") # Print the data frame.print(addresses)# Create another data frame with similar columnsnew.address <- data.frame(   city = c("Lowry","Charlotte"),   state = c("CO","FL"),   zipcode = c("80230","33949"),   stringsAsFactors=FALSE)# Print a header.cat("# # # The Second data frame/n") # Print the data frame.print(new.address)# Combine rows form both the data frames. rbind is row bind 结合行all.addresses <- rbind(addresses,new.address)# Print a header.cat("# # # The combined data frame/n") # Print the result.print(all.addresses)
melt and cast 
熔化和转换
R语言编程的最有趣的地方是关于改变多个步骤中的数据的形状来获得所希望的形状。用来做这种函数被称为 melt() 和 cast()。
我们认为数据集被称为 ships 出现在库被称为 "MASS".
library(MASS)print(ships)当我们上面的代码执行时，它产生以下结果：
   type year period service incidents1     A   60     60     127         02     A   60     75      63         03     A   65     60    1095         34     A   65     75    1095         45     A   70     60    1512         6..........................8     A   75     75    2244        119     B   60     60   44882        3910    B   60     75   17176        2911    B   65     60   28609        58........................17    C   60     60    1179         118    C   60     75     552         119    C   65     60     781         0........................融化数据
现在，我们融化数据需要组织其转换类型(type), 并且 year 到多行以外的所有列。
molten.ships <- melt(ships, id = c("type","year"))print(molten.ships)当我们上面的代码执行时，它产生以下结果：
    type year  variable value1      A   60    period    602      A   60    period    753      A   65    period    604      A   65    period    75........................9      B   60    period    6010     B   60    period    7511     B   65    period    6012     B   65    period    7513     B   70    period    60......................41     A   60   service   12742     A   60   service    6343     A   65   service  1095......................70     D   70   service  120871     D   75   service     072     D   75   service  205173     E   60   service    4574     E   60   service     075     E   65   service   789......................101    C   70 incidents     6102    C   70 incidents     2103    C   75 incidents     0104    C   75 incidents     1105    D   60 incidents     0106    D   60 incidents     0......................转换数据
我们可以转化数据转换成在创建每种类型的 ships 每年的汇总的新形式。它是通过使用 case()函数。
recasted.ship <- cast(molten.ships, type+year~variable,sum)print(recasted.ship)当我们上面的代码执行时，它产生以下结果：
   type year period service incidents1     A   60    135     190         02     A   65    135    2190         73     A   70    135    4865        244     A   75    135    2244        115     B   60    135   62058        686     B   65    135   48979       1117     B   70    135   20163        568     B   75    135    7117        189     C   60    135    1731         210    C   65    135    1457         111    C   70    135    2731         812    C   75    135     274         113    D   60    135     356         014    D   65    135     480         015    D   70    135    1557        1316    D   75    135    2051         417    E   60    135      45         018    E   65    135    1226        1419    E   70    135    3318        1720    E   75    135     542         1
读一个CSV文件
以下是 read.csv()函数的一个简单的例子，它读取在当前工作目录的可用的 CSV 文件：
data <- read.csv("input.csv")print(data)data <- read.csv("input.csv")#分析data的行列情况print(is.data.frame(data))print(ncol(data))print(nrow(data))# Get the person detail having max salary. 求最高工资记录的具体情况retval <- subset(data, salary == max(salary))print(retval)
# Create a data frame.data <- read.csv("input.csv")info <- subset(data, salary > 600 & dept == "IT")#工资大于600 并且是IT部门的员工print(info)# Create a data frame.data <- read.csv("input.csv")retval <- subset(data, as.Date(start_date) > as.Date("2014-01-01"))#生日大于2014-1-1日print(retval)# Load the packages required to read XML files.library("XML")library("methods")# Convert the input xml file to a data frame.xmldataframe <- xmlToDataFrame("input.xml")#加载xml里面的数据print(xmldataframe)# Load the package required to read JSON files.library("rjson")# Give the input file name to the function.result <- fromJSON(file="input.json")# Print the result.print(result)# Load the package required to read JSON files.library("rjson")# Give the input file name to the function.result <- fromJSON(file="input.json")# Convert JSON file to a data frame.json_data_frame <- as.data.frame(result)#json字符类型 到frame 帧print(json_data_frame)# Create a connection Object to MySQL database.# We will connect to the sampel database named "sakila" that comes with MySql installation. mysqlconnection = dbConnect(MySQL(), user='root', passWord='', dbname='sakila', host='localhost')# List the tables available in this database. dbListTables(mysqlconnection)# Query the "actor" tables to get all the rows.输入sqlresult = dbSendQuery(mysqlconnection, "select * from actor")# Store the result in a R data frame object. n=5 is used to fetch first 5 rows.现在查询的条数data.frame = fetch(result, n=5)print(data.fame)# Create the function. 找到向量中出现次数最多的元素getmode <- function(v) {	uniqv <- unique(v)	uniqv[which.max(tabulate(match(v, uniqv)))]}# Create the vector with numbers.v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3)# Calculate the mode using the user function. result <- getmode(v)print(result)K线图：
library(quantmod)sse<-getSymbols('^SSEC', from='2015-1-1',to=Sys.Date(), src='yahoo')SSEC.m <- to.monthly(SSEC)tail(SSEC.m)candleChart(SSEC.m,theme = 'white')