setwd()
getwd()
diabetes
=c("Type1","Type2","Type1","Type2","Type2","Type2","Type1","Type1","Type2","Type1")
install.packages(tydiverse)
library(dplyr)
detach("package:stats", unload = TRUE)
data("USArrests")
head(USArrests)
summary(USArrests)
sd(USArrests$Murder)
View(USArrests)
table(UCBAdmissions)
str(UCBAdmissions)
levels(gss_cat$rincome)
shapiro.test(USArrests$Murder)
load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")
library(haven)
mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")
mcs$math<-as.factor(mcs$mths)
mcs$math<-recode("1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")
mcs$science=recode(mcs$math,"1"="Strongly Disagree", "2"="Disagree","3"="Agree", "4"="Strongly Agree")
gfk_cleaned_eul$birthyear = 2014 - gfk_cleaned_eul$age
gfk_cleaned_eul$birthyear_cat = cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))
gfk_cleaned_eul$birthyear_cat = recode (gfk_cleaned_eul$birthyear_cat,"(-Inf,1945]"= "born in 1945 or before","(1945,1964]"="Boomers","(1965,1984]"="GenX","(1985,1996]"="Millenium","(1997,Inf]"="GenZ")
gfk_excel_version <- read_excel("gfk_excel_version.xls")
gfk_excel_version$hhincome<-na_if(gfk_excel_version$hhincome,"Refused")
gfk_excel_version$hhincome<-factor(gfk_excel_version$hhincome, levels=c("Over 200,000","150,000 - 199,999","100,000 - 149,999","95,000 - 99,999","90,000 - 94,999","85,000 - 89,999","80,000 - 84,999","75,000 - 79,999","70,000 - 74,999","65,000 - 69,999","60,000 - 64,999","55,000 - 59,999","50,000 - 54,999","45,000 - 49,999","40,000 - 44,999","35,000 - 39,999","30,000 - 34,999","25 000 - 29 999","20,000 - 24,999","15,000 - 19,999","10,000 - 14,999","5,000 - 10,000","Under 5,000"))
nlevels(gfk_excel_version$hhincome)
min_birthyear <- min(gfk_cleaned_eul$birthyear)
print(min_birthyear)
View(SSE_students_data)
cuhksz_students_h <- rbind(SME_students_data,SSE_students_data)
mean(cuhksz_students_h$BMI)
Survey_GE_class_choice_2<-Survey_GE_class_choice_2[,-7]
CUHK_employement_1<-merge(CUHKSZ_employment_survey_1,CUHKSZ_employment_survey_1b, by="ID")
new_name <- "CUHKSZ_employment_survey_2"
assign(new_name, CUHKSZ_employment_survey_2)
CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]
column_index_to_change <- which(column_names == "Month_salary_22.x")
column_names[column_index_to_change] <- new_column_name
help(recode)
coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11),labels=c("0_2","2_5","5_7","more_than_7"))
Answer:
# Setting and getting working directory
setwd()
getwd()
# Creating a vector 'diabetes'
diabetes <- c("Type1", "Type2", "Type1", "Type2", "Type2", "Type2", "Type1", "Type1", "Type2", "Type1")
# function_name: "c"; function: "combine elements into a vector"; format: "new_vector <- c(...)"
# Installing and loading necessary packages
install.packages("tidyverse")
# function_name: "install.packages"; function: "install specified packages"; format: "install.packages(...)"
library(dplyr)
# function_name: "library"; function: "load specified library"; format: "library(...)"
detach("package:stats", unload = TRUE)
# function_name: "detach"; function: "detach specified package"; format: "detach(..., unload = TRUE)"
# Loading the 'USArrests' dataset and performing basic operations
data("USArrests")
# function_name: "data"; function: "load specified dataset"; format: "data(...)"
head(USArrests)
# function_name: "head"; function: "display the first few rows of a dataset"; format: "head(...)"
summary(USArrests)
# function_name: "summary"; function: "display summary statistics of a dataset"; format: "summary(...)"
sd(USArrests$Murder)
# function_name: "sd"; function: "calculate standard deviation"; format: "new_variable <- sd(...)"
View(USArrests)
# function_name: "View"; function: "open a viewer for a dataset"; format: "View(...)"
# Exploring data in 'UCBAdmissions'
table(UCBAdmissions)
# function_name: "table"; function: "create a table of counts"; format: "table(...)"
# Checking levels in a categorical variable in 'gss_cat'
levels(gss_cat$rincome)
# function_name: "levels"; function: "get the levels of a factor variable"; format: "levels(...)"
# Conducting a Shapiro-Wilk test on 'Murder' column in 'USArrests'
shapiro.test(USArrests$Murder)
# function_name: "shapiro.test"; function: "conduct the Shapiro-Wilk test"; format: "shapiro.test(...)"
# Loading and manipulating data from the '2015 Millennium Cohort Study'
load("E:/RGEB/2015 Millennium Cohort Study/mcs.dta")
# function_name: "load"; function: "load specified file or dataset"; format: "load(...)"
library(haven)
# function_name: "library"; function: "load specified library"; format: "library(...)"
mcs <- read_dta("2015 Millennium Cohort Study/mcs.dta")
# function_name: "read_dta"; function: "read data from a Stata file"; format: "new_dataset <- read_dta(...)"
# Creating a new variable 'birthyear' and categorizing it
gfk_cleaned_eul$birthyear <- 2014 - gfk_cleaned_eul$age
# function_name: "subtract"; function: "subtract one variable from another"; format: "new_variable <- ... - ..."
gfk_cleaned_eul$birthyear_cat <- cut(gfk_cleaned_eul$birthyear, breaks = c(-Inf, 1945, 1965, 1985, 1997, Inf), labels = c("(-Inf,1945]", "(1945,1964]", "(1965,1984]", "(1985,1996]", "(1997,Inf]"))
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"
gfk_cleaned_eul$birthyear_cat <- recode(gfk_cleaned_eul$birthyear_cat, "(-Inf,1945]"="born in 1945 or before", "(1945,1964]"="Boomers", "(1965,1984]"="GenX", "(1985,1996]"="Millenium", "(1997,Inf]"="GenZ")
# function_name: "recode"; function: "recode levels of a factor variable"; format: "new_variable <- recode(...)"
# Reading an Excel file and processing 'hhincome' variable
gfk_excel_version <- read_excel("gfk_excel_version.xls")
# function_name: "read_excel"; function: "read data from an Excel file"; format: "new_dataset <- read_excel(...)"
gfk_excel_version$hhincome <- na_if(gfk_excel_version$hhincome, "Refused")
# function_name: "na_if"; function: "replace specific values with NA"; format: "new_variable <- na_if(...)"
gfk_excel_version$hhincome <- factor(gfk_excel_version$hhincome, levels = c("Over 200,000", "150,000 - 199,999", ...))
# function_name: "factor"; function: "convert a variable to a factor with specified levels"; format: "new_variable <- factor(...)"
# Merging and cleaning datasets
CUHK_employement_1 <- merge(CUHKSZ_employment_survey_1, CUHKSZ_employment_survey_1b, by="ID")
# function_name: "merge"; function: "merge datasets by a common variable"; format: "new_dataset <- merge(...)"
new_name <- "CUHKSZ_employment_survey_2"
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"; Note: This line has a placeholder, and the actual value is not provided.
assign(new_name, CUHKSZ_employment_survey_2)
# function_name: "assign"; function: "assign a value to a variable"; format: "assign(..., ...)"
CUHK_employement_2 <- CUHK_employement_2[, -ncol(CUHK_employement_2)]
# function_name: "subset"; function: "remove specified column(s)"; format: "new_dataset <- old_dataset[, -ncol(old_dataset)]"
# Manipulating data in 'coffeenew'
column_index_to_change <- which(column_names == "Month_salary_22.x")
# function_name: "which"; function: "get the index of elements that satisfy a condition"; format: "new_index <- which(...)"
column_names[column_index_to_change] <- new_column_name
# function_name: "replacement"; function: "replace specific values"; format: "new_vector <- old_vector; new_vector[index] <- new_value"
help(recode) # function_name: "help"; function: "display help documentation"; format: "help(...)"
coffeenew$newcofnumcat <- cut(coffeenew$nrb_coffee_week, breaks=c(0,2,5,7,11), labels=c("0_2","2_5","5_7","more_than_7"))
# function_name: "cut"; function: "create categorical variable by cutting a numeric variable"; format: "new_variable <- cut(...)"
Question: What R function is used to set the working directory?
setdir()
setwd()
setworking()
workdir()
Question: Which function installs specified R packages?
load.packages()
install.library()
install.packages()
library.install()
Question: What function is used to load a specified library in R?
load()
library()
load.library()
import.library()
Question: In R, which function is used to calculate the standard deviation of a numeric variable?
calculate_sd()
std_dev()
sd()
variance()
Question: What function opens a viewer for a dataset in R?
explore()
browse()
view()
View()
Question: In R, what function create a table of counts for categorical data?
tabulate()
table()
count()
crosstab()
Question: Which function is used to get the levels of a factor variable in R?
getlevels()
factorlevels()
levels()
factor_levels()
Question: What R function is used to conduct the Shapiro-Wilk test?
shapiro()
wilks.test()
shapiro.test()
test.shapiro()
Question: Which function reads data from a Stata file in R?
read_spss()
read_stata()
read_sas()
read_dta()
Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins?
bin()
create_cat()
cut()
category()
11. Question: Which function is used to merge datasets by a common variable in R?
combine()
merge()
join()
concat()
12. Question: What function is used to replace specific values with NA in R?
replace_na()
na_replace()
na_if()
replace_with_na()
13. Question: In R, which function is used to convert a variable to a factor with specified levels?
convert_factor()
to_factor()
factorize()
factor()
14. Question: What R function is used to remove specified columns from a dataset?
remove_cols()
subset()
drop_cols()
exclude()
15. Question: Which function in R is used to get the index of elements that satisfy a condition?
find()
locate()
index()
which()
16. Question: In R, what function is used to create a categorical variable by cutting a numeric variable into bins with labels?
categorize()
label_cut()
create_category()
cut()
17. Question: Which function in R displays help documentation for a specified function?
help()
info()
documentation()
assist()
18. Question: What function is used to replace specific values with new values in R?
replace_values()
change()
recode()
modify()
19. Question: In R, what function is used to create a new categorical variable based on the values of a numeric variable?
category_from_numeric()
create_categorical()
label_numeric()
cut()
20. Question: Which function in R is used to replace specific values with new values in a dataset?
replace()
modify()
recalculate()
recode()
Answer for quiz
Answer: b) setwd()
setwd("/path/to/your/directory")
Answer: c) install.packages()
install.packages("tidyverse")
Answer: b) library()
library(dplyr)
Answer: c) sd()
standard_deviation <- sd(data$variable)
Answer: d) View()
View(data)
Answer: b) table()
table(factor_data)
Answer: c) levels()
factor_levels <- levels(factor_data)
Answer: c) shapiro.test()
shapiro.test(data$numeric_variable)
Answer: d) read_dta()
dataset <- read_dta("file.dta")
Answer: c) cut()
Example: cut_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100))
Answer: b) merge()
merged_data <- merge(data1, data2, by="common_variable")
Answer: c) na_if()
data$variable <- na_if(data$variable, "specific_value")
Answer: d) factor()
data$variable <- factor(data$variable, levels = c("level1", "level2", "level3"))
Answer: b) subset()
new_data <- subset(data, select = -c(column_to_remove))
Answer: d) which()
index <- which(data$condition == TRUE)
Answer: d) cut()
category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))(注意:按照此代码样例是划分了4个区间,
最后一个区间 (75, 100]
将没有与之关联的标签。如果你想要为这个区间指定一个特定的标签,你可以在 labels
参数中加入一个额外的标签)Answer: a) help()
help(function_name)
Answer: c) recode()
data$variable <- recode(data$variable, "old_value" = "new_value")
Answer: d) cut()
category_variable <- cut(data$numeric_variable, breaks = c(0, 25, 50, 75, 100), labels = c("Low", "Medium", "High"))
Answer: a) replace()
data$variable <- replace(data$variable, data$condition, new_value)