agGraphSearch, knitr, SPARQL
Last modified: 2022-12-08 23:17:19
Compiled: 2022-12-08 23:17:47
Once agGraphSearch is installed, it can be loaded by the following command.
#Clean the enbironment
rm(list=ls())
#Install
install.packages("devtools")
devtools::install_github( "kumeS/agGraphSearch", force = TRUE)
#Load packages
library(agGraphSearch)
library(magrittr)
In this tutorial, the Japanese terms obtained from PolyInfo Database will be used and the number of terms is total 510.
#Read Japanese terms
fp <- system.file("extdata", "PoLyInfo_terms_Jpn_v01.csv", package="agGraphSearch")
wordsj <- data.frame(readr::read_csv(fp, col_names = T, show_col_types = FALSE))
#Processing
words <- unique(unlist(wordsj, use.names = F))
words <- words[!is.na(words)]
#show the head
head(words)
length(words)
I also showed how to use the English terms obtained from PolyInfo Database (total 463 terms).
if(F){
#Read English terms
fp <- system.file("extdata", "PoLyInfo_terms_En_v01.csv", package="agGraphSearch")
wordse <- data.frame(readr::read_csv(fp, col_names = T, show_col_types = FALSE))
#Processing
words <- unique(unlist(wordse, use.names = F))
words <- words[!is.na(words)]
#show the head
head(words)
}
Download below files and save on the local environment.
## Download Class-related hierarchy dataset (Common to English and Japanese)
browseURL("https://drive.google.com/file/d/14sBKcU0vDZG18krsgXw7sVMkEG10muPd/view?usp=share_link")
# the modified version
browseURL("https://drive.google.com/file/d/16Jh7YMnLWplyHMxdiA3e6t3cIIDy0kvz/view?usp=share_link")
## Download Japanese labels
browseURL("https://drive.google.com/file/d/116zAB0Ycggd6Fu45CSSSzM9kDDF5xwlB/view?usp=share_link")
## Download English labels
browseURL("https://drive.google.com/file/d/1_BOq3jh3QNqHarlvCyFkp--0dUwGYCPR/view?usp=share_link")
Then, I moved the files to your working directory. In this time, I manually moved their files to the “04_Wiki_PolyInfo_00” folder in the working directory .
#Source script
fp <- system.file("script", "srachWikidataPolyInfo.R", package="agGraphSearch")
source(fp)
If you download files, you can skip this.
#Multi-comment-out
if(F){
#Load the hierarchy dataset
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/Relation_P279_P31_df.Rds")
#Check data
head(ClassHi)
#table(ClassHi$Property)
#Remove loops
table(ClassHi$Subject != ClassHi$Object)
ClassHi <- ClassHi[ClassHi$Subject != ClassHi$Object,]
saveRDS(ClassHi, "./04_Wiki_PolyInfo_00/ClassHiR.Rds")
#Load the label dataset
Labels01 <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_rdfs_df.Rds")
Labels02 <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_Altlabel_df.Rds")
#Bind, extract and check labels
Labels <- rbind(Labels01, Labels02)
head(Labels)
saveRDS(Labels, "./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
}
Here Wikidata datasets were imported.
#Load the hierarchy dataset (100,193,046 triples)
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/ClassHiR.Rds")
#Load the Japanese label dataset
Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
Input are 510 terms in Japanese.
#Show data
head(words)
head(Labels)
#Linking to the wikidata entities (complete lexical matching)
LabelsR <- Labels[Labels$Object %in% words,]
LabelsR <- LabelsR[LabelsR$Subject %in% unique(ClassHi$Subject),]
#Check data
dim(LabelsR)
head(LabelsR)
#rename row
rownames(LabelsR) <- 1:nrow(LabelsR)
dim(LabelsR)
#Obtain the subject QID
QID.d <- data.frame(QID=unique(LabelsR$Subject))
This program executes SPARQL query.
Input entities are 247.
##Proxy setting if you need. e.g.
#proxy_url = "http://wwwproxy.osakac.ac.jp:8080"; ProxySet(proxy_url)
#set parameters
ExcluQ <- c("Q101352", "Q16521", "Q11424", "Q6256",
"Q12308941", "Q11879590", "Q5", "Q5327369",
"Q577", "Q4167410", "Q3305213", "Q13406463",
"Q35120", "Q3695082", "Q131257", "Q80071",
"Q4830453", "Q482994", "Q20202269", "Q2996394")
NumQ <- length(ExcluQ)
#run
QID.d <- SearchEntity()
#remove hits
QID.d <- QID.d[apply(QID.d[,-1], 1, function(x) sum(x)) == 0,]
#show results
dim(QID.d)
head(QID.d)
#Source script
fp <- system.file("script", "srachWikidataPolyInfo.R", package="agGraphSearch")
source(fp)
#set parameters
ExcluP <- c("P131","P21","P747","P4743","P577")
NumP <- length(ExcluP)
#run
QID.d <- SearchProperty()
#remove hits
QID.d <- QID.d[apply(QID.d[,-1], 1, function(x) sum(x)) == 0,]
#show results
dim(QID.d)
head(QID.d)
#Save
#saveRDS(QID.d, "./04_Wiki_PolyInfo_XX/QID.d.Rds")
#Re-load
#QID.d <- readRDS("./04_Wiki_PolyInfo_XX/QID.d.Rds")
#renames rows
rownames(QID.d) <- 1:nrow(QID.d)
QID.d01 <- QID.d[,-c(2:ncol(QID.d))]
#the number of QID
length(QID.d01)
head(QID.d01)
Here the upper-level searches are performed.
#show labels
SearchEntity <- Labels[Labels$Subject %in% QID.d01 & Labels$Property == "rdfs:label",]
head(SearchEntity); dim(SearchEntity)
##################################################################
#Upper search up to 30 steps
##################################################################
#Need to source "srachWikidataPolyInfo.R" script
#set parameters
QID.list <- QID.d01
#run
QID.dat <- upperSearch(Depth = 30)
length(QID.dat)
#Save
#saveRDS(QID.dat, "./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#saveRDS(QID.list, "./04_Wiki_PolyInfo_XX/QID.list.Rds")
#Re-load
#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#Need to source "srachWikidataPolyInfo.R" script
#run
VisualizeGraph()
Check the results in the folder (04_Wiki_PolyInfo_01)
Here we calculate the lower-level search entities and then identify the common upper entities.
#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#str(QID.dat)
#################################################
#Calculate the common upper entities
#################################################
#Need to source "srachWikidataPolyInfo.R" script
QID.dat.res <- calcCU()
str(QID.dat.res)
#Select QID.dat
QID.dat_02 <- QID.dat.res$QID.dat
head(QID.dat_02)
dim(QID.dat_02)
rownames(QID.dat_02) <- 1:nrow(QID.dat_02)
##Check NA
table(is.na(QID.dat_02))
table(is.na(QID.dat_02$Subject))
table(is.na(QID.dat_02$Object))
#################################################
## Visualize entire network diagrams
#################################################
#Merge labels
QID.datR <- MergeWikiData(Data=QID.dat_02,
Labels=Labels)
head(QID.datR)
QID.datR <- QID.datR[QID.datR$subject != QID.datR$parentClass,]
QID.datR$subjectLabel <- paste0(QID.datR$subjectLabel, ".", QID.datR$subject)
QID.datR$parentClassLabel <- paste0(QID.datR$parentClassLabel, ".", QID.datR$parentClass)
head(QID.datR)
agGraphSearch:::agVisNetwork(Graph=QID.datR,
NodeColorRandom=F, Count=2,
StarSize=10,
Output=T, Browse=F,
FilePath= paste0("./agVisNetwork_entire.html"))
suppressMessages(filesstrings::file.move(files = paste0("./agVisNetwork_entire.html"),
destinations = "./04_Wiki_PolyInfo_02", overwrite = T))
#################################################
#Count entities and triples
#################################################
#entities
E01 <- length(unique(c(QID.dat_02$Subject, QID.dat_02$Object)))
E01
#[1] 763
#triples
E02 <- length(unique(QID.dat_02$triple))
E02
#[1] 1292
#################################
#Calculate CU entities
#################################
#Select CUcounts
CUcounts00 <- QID.dat.res$CUcounts
#Calculate
cu_table <- table(CUcounts00)
cu_table <- cu_table[rev(order(cu_table))]
cu_table1 <- data.frame(parentClass=names(cu_table),
Freq=as.numeric(cu_table),
row.names = 1:length(cu_table),
stringsAsFactors = F)
head(cu_table1)
cu_table2 <- merge(cu_table1,
Labels[Labels$Property == "rdfs:label",],
by.x = "parentClass", by.y = "Subject",
all.x = T, all.y = F, sort=F)
cu_table3 <- cu_table2[cu_table2$Freq > 1,]
head(cu_table3)
tail(cu_table3)
#CU entities
dim(cu_table3)
#[1] 346 5
#Save
#saveRDS(cu_table3, "./04_Wiki_PolyInfo_XX/cu_table3.Rds")
#re-load
#cu_table3 <- readRDS("./04_Wiki_PolyInfo_XX/cu_table3.Rds")
#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#str(QID.dat)
#run pre-processing
#Need to source "srachWikidataPolyInfo.R" script
graph_Dat <- MeargeDatasets()
str(graph_Dat)
#Save
#saveRDS(graph_Dat, "./04_Wiki_PolyInfo_XX/graph_Dat.Rds")
#re-load
#graph_Dat <- readRDS("./04_Wiki_PolyInfo_XX/graph_Dat.Rds")
#Calculation
SearchRnage <- agGraphAnalysis(graphList=graph_Dat$graph_List,
search_List=graph_Dat$search_List,
CU_List=graph_Dat$CU_List)
#system("mv -f ./Results_* ./04_Wiki_PolyInfo_02")
#Levels
SearchRnage00 <- SearchRnage[!is.na(SearchRnage$Levels),]
SearchRnage00 <- SearchRnage00[order(SearchRnage00$Levels),]
rownames(SearchRnage00) <- 1:nrow(SearchRnage00)
head(SearchRnage00)
tail(SearchRnage00)
#Merge labels
SearchRnage01 <- merge(SearchRnage00,
Labels[Labels$Property == "rdfs:label",],
by.x = "CommonEntity", by.y = "Subject",
all.x = T, all.y = F, sort=F)
SearchRnage01 <- SearchRnage01[order(SearchRnage01$Levels),]
rownames(SearchRnage01) <- 1:nrow(SearchRnage01)
SearchRnage01$Property <- "rdfs:label"
SearchRnage01$Object[is.na(SearchRnage01$Object)] <- SearchRnage01$CommonEntity[is.na(SearchRnage01$Object)]
head(SearchRnage01, n=15)
tail(SearchRnage01, n=15)
#Save
#saveRDS(SearchRnage01, "./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
#readr::write_excel_csv(SearchRnage01, "./04_Wiki_PolyInfo_XX/SearchRnage01.csv")
#Re-load
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
Here we search the subordinate entities from the the expanded common upper entities.
#Need to source "srachWikidataPolyInfo.R" script
subordinateSearch()
#rm(list=ls())
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
dim(SearchRnage01)
#run
mergeECUsearch()
Outputs: ECU.list.Rds and TotalData.Rds
ECU.list: results of number of unique triples (UniTripleNum) and number of unique entity (UniEntityNum) for each ECU entity.
TotalData: Total sets of search results
#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
str(ECU.list)
CountDat <- data.frame(No=1:max(ECU.list$Levels),
UniEntityNum=NA,
UniTripleNum=NA,
UniJpnLab=NA,
AccEntityNum=NA,
AccTripleNum=NA,
AccJpnLab=NA,
ExtractedNum=NA,
RateAccEntity=NA,
TotalAnswerNum=NA)
##Evaluation
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#OCR <- unlist(readr::read_csv("./高分子辞典OCR/高分子辞典OCR_2column_jpn_RRR.csv", col_names=F, show_col_types = FALSE), use.names = F)
OCR.eva <- OCR[OCR %in% Labels$Object]
length(OCR.eva)
#step 03
#Evaluation
CountDat <- EvaluationAll()
CountDat
#rm(list=ls())
SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/ClassHiR.Rds")
QID.id <- readRDS("./04_Wiki_PolyInfo_XX/QID.d.Rds")$QID
CU_List <- readRDS("./04_Wiki_PolyInfo_XX/CU_List.Rds")
#Refinement: get the direction of the search entity
runRefinement()
#system(paste0("rm -rf ./04_Wiki_PolyInfo_06/*"))
#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
head(ECU.list)
k <- max(ECU.list$Levels)
##step 01
for(m in 1:k){
#m <- 1
message("m: ", m)
d <- ECU.list[ECU.list$Levels == m,]
d1 <- as.numeric(rownames(d))
temp <- NULL
system(paste0("rm -rf ", "./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"))
for(n in d1){
#n <- 1
message("n: ", n)
Dat <- readRDS(paste0("./04_Wiki_PolyInfo_05/",
formatC(n, width = 4, flag = "0"),
"_L", ECU.list$Levels[n], "_",
ECU.list$CommonEntity[n], "_refine.Rds"))
readr::write_csv(Dat,
paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"),
append=T, col_names=F)
}
#getwd()
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"),
show_col_types = FALSE, col_names=F))
colnames(temp) <- c("Subject", "Property", "Object")
temp$triple <- paste0(temp$Subject, ".", temp$Property, ".", temp$Object)
rownames(temp) <- 1:nrow(temp)
temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
#head(temp)
readr::write_csv(temp, paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, "R.csv"), append=F, col_names=F)
}
#Remove
if(length(dir(path = "./04_Wiki_PolyInfo_06", pattern = "R_Acc.csv")) != 0){
system("rm -rf ./04_Wiki_PolyInfo_06/Refinement_OutTriples_L*R_Acc.csv")
}
##step 02
for(j in 1:k){
#j <- 1
message("j: ", j)
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", j, "R.csv"),
show_col_types = FALSE, col_names=F))
for(l in j:k){
#l <- 1
message("l: ", l)
readr::write_csv(temp,
paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", l, "R_Acc.csv"),
append=T, col_names=F)
}
}
#rm(list=ls())
#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
head(ECU.list)
k <- max(ECU.list$Levels)
CountDatRefine <- data.frame(No=1:k,
UniEntityNum=NA,
UniTripleNum=NA,
UniJpnLab=NA,
AccEntityNum=NA,
AccTripleNum=NA,
AccJpnLab=NA,
ExtractedNum=NA,
RateAccEntity=NA,
TotalAnswerNum=NA)
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#OCR <- unlist(readr::read_csv("./高分子辞典OCR/高分子辞典OCR_2column_jpn_RRR.csv", col_names=F, show_col_types = FALSE), use.names = F)
OCR.eva <- OCR[OCR %in% Labels$Object]
length(OCR.eva)
##step 03
for(n in 1:k){
#n <- 1
message("n: ", n)
#Uni
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R.csv"),
show_col_types = FALSE, col_names=F))
colnames(temp) <- c("Subject", "Property", "Object", "triple")
rownames(temp) <- 1:nrow(temp)
temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
#head(temp)
message("Uni")
CountDatRefine$UniEntityNum[n] <- length(unique(c(temp$Subject, temp$Object)))
CountDatRefine$UniTripleNum[n] <- length(unique(temp$triple))
#Jpn
UniEntity <- unique(c(temp$Subject, temp$Object))
#head(Labels)
LabelsNum <- Labels[Labels$Subject %in% UniEntity, ]
CountDatRefine$UniJpnLab[n] <- length(unique(LabelsNum$Subject))
#Acc
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R_Acc.csv"),
show_col_types = FALSE, col_names=F))
colnames(temp) <- c("Subject", "Property", "Object", "triple")
rownames(temp) <- 1:nrow(temp)
temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
readr::write_csv(temp,
paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R_Acc_Uni.csv"),
append=F, col_names=F)
#head(temp)
message("Acc")
CountDatRefine$AccEntityNum[n] <- length(unique(c(temp$Subject, temp$Object)))
CountDatRefine$AccTripleNum[n] <- length(unique(temp$triple))
#Jpn
UniEntity <- unique(c(temp$Subject, temp$Object))
#head(Labels)
LabelsNum <- Labels[Labels$Subject %in% UniEntity, ]
CountDatRefine$AccJpnLab[n] <- length(unique(LabelsNum$Subject))
message("OCR")
Eva00 <- unique(c(temp$Subject, temp$Object))
#head(Labels)
Eva01 <- unique(Labels[Labels$Subject %in% Eva00,]$Object)
#Eva01
Eva02 <- round(sum(Eva01 %in% OCR.eva)/length(OCR.eva)*100, 3)
CountDatRefine$ExtractedNum[n] <- sum(Eva01 %in% OCR.eva)
CountDatRefine$RateAccEntity[n] <- Eva02
CountDatRefine$TotalAnswerNum[n] <- length(OCR.eva)
print(CountDatRefine)
}
SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
dim(SearchRnage01)
#01 Barplot
SearchRnage02 <- data.frame(No=names(table(SearchRnage01$Levels)),
each_entity=as.numeric(table(SearchRnage01$Levels)))
head(SearchRnage02)
#ECU
CountDatRefine$RateAccEntityPrec <- round((CountDatRefine$ExtractedNum / CountDatRefine$AccEntityNum)*100, 5)
CountDatRefine$RateAccEntityPrecLog <- round(logb((CountDatRefine$ExtractedNum / CountDatRefine$AccEntityNum)*100), 5)
CountDatRefine$RateAccJpnPrec <- round((CountDatRefine$ExtractedNum / CountDatRefine$AccJpnLab)*100, 5)
CountDatRefine$RateAccJpnPrecLog <- round(logb((CountDatRefine$ExtractedNum / CountDatRefine$AccJpnLab)*100), 5)
CountDatRefine$each_entity <- SearchRnage02$each_entity[1:k]
CountDatRefine$sum_entity <- cumsum(SearchRnage02$each_entity)[1:k]
readr::write_csv(CountDatRefine,
"./04_Wiki_PolyInfo_XX/CountDatRefine.csv", append=F, col_names=T)
library(magrittr); library(plotly)
CountDat <- data.frame(readr::read_csv("./04_Wiki_PolyInfo_XX/CountDat.csv", show_col_types = FALSE, col_names=T))
CountDat
CountDatRefine <- data.frame(readr::read_csv("./04_Wiki_PolyInfo_XX/CountDatRefine.csv", show_col_types = FALSE, col_names=T))
CountDatRefine
######################################################
#Barplot 01
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~each_entity, type = 'bar', name = '',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of ECU entities"),
yaxis = list(title = 'Entity number', linewidth = 1),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
if(!dir.exists("04_Wiki_PolyInfo_04")){dir.create("04_Wiki_PolyInfo_04")}
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_01.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Barplot 02
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~UniEntityNum, type = 'bar', name = '',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
yaxis = list(title = 'Entity number', linewidth = 1,
range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_02.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Barplot 03
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~AccEntityNum, type = 'bar', name = '',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
yaxis = list(title = 'Entity number', linewidth = 1,
range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_03.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Plot 01: Recall
######################################################
CountDat$Refine.RateAccEntity <- CountDatRefine$RateAccEntity
CountDat$Refine.RateAccJpnPrec <- CountDatRefine$RateAccJpnPrec
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~RateAccEntity,
type = 'scatter', mode = 'lines+markers', name = 'All candidates',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)',
size=8,
line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% add_trace(CountDat, y = ~Refine.RateAccEntity, name = 'Refinement',
mode = 'lines+markers',
marker = list(color = 'rgb(255,128,0)',
size=8,
line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% plotly::layout(title = paste0(""),
yaxis = list(title = 'Recall rate (%)', linewidth = 1,
tick0 = 0, dtick = 10, range=c(0,80), zeroline = F),
xaxis = list(title = "NES range", ticks = "outside", linewidth = 1,
tick0 = 0, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_04R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Plot 02: Precision
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~RateAccJpnPrec,
type = 'scatter', mode = 'lines+markers', name = 'All candidates',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)',
size=8,
line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% add_trace(CountDat, y = ~Refine.RateAccJpnPrec, name = 'Refinement',
mode = 'lines+markers',
marker = list(color = 'rgb(255,128,0)',
size=8,
line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% plotly::layout(title = paste0(""),
yaxis = list(title = 'Precision rate (%)', linewidth = 1,
tick0 = 0, dtick = 1, type = "log"),
xaxis = list(title = "NES range", ticks = "outside", linewidth = 1,
tick0 = 0, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_05R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#CountDatRefine: Barplot 02
######################################################
fig <- plotly::plot_ly(CountDatRefine, x = ~No, y = ~UniEntityNum, type = 'bar', name = '',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(255,158,72)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
yaxis = list(title = 'Entity number', linewidth = 1,
range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_06R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#CountDatRefine: Barplot 03
######################################################
fig <- plotly::plot_ly(CountDatRefine, x = ~No, y = ~AccEntityNum, type = 'bar', name = '',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(255,158,72)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_07R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Barplot 04
######################################################
CountDat$Refine.each_entity <- CountDatRefine$each_entity
CountDat$Refine.UniEntityNum <- CountDatRefine$UniEntityNum
CountDat$Refine.AccEntityNum <- CountDatRefine$AccEntityNum
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~each_entity, type = 'bar', name = 'All candidates',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.each_entity, name = 'Refinement',
type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% plotly::layout(title = paste0("Number of ECU entities"),
bargap = 0.1, bargroupgap = 0.1,
yaxis = list(title = 'Entity number', linewidth = 1),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
if(!dir.exists("04_Wiki_PolyInfo_04")){dir.create("04_Wiki_PolyInfo_04")}
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_01R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Barplot 05
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~UniEntityNum, type = 'bar', name = 'All candidates',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.UniEntityNum, name = 'Refinement',
type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
bargap = 0.1, bargroupgap = 0.1,
yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_02R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
######################################################
#Barplot 06
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~AccEntityNum, type = 'bar', name = 'All candidates',
hovertemplate = paste0('%{y}'),
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.AccEntityNum, name = 'Refinement',
type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
bargap = 0.1, bargroupgap = 0.1,
yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
margin = list(l=50, r=10, b=50, t=50, pad=4))
fig
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_03R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))
In this step, the results extracted from Wikidata are converted to RDF.
Unnecessary strings were removed.
#QID.list <- readRDS("./04_Wiki_PolyInfo_XX/QID.list.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#Read All candidates
convRDF_all(N = 7,
FileName="./04_Wiki_PolyInfo_07/RDF_from_Wikidat_all_v02.nt")
#Read All data after refinement
convRDF_refinement(N = 7,
FileName="./04_Wiki_PolyInfo_07/RDF_from_Wikidat_refine_v02.nt")
## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Big Sur 11.5.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] BiocStyle_2.26.0
##
## loaded via a namespace (and not attached):
## [1] bookdown_0.30 digest_0.6.30 R6_2.5.1
## [4] jsonlite_1.8.3 magrittr_2.0.3 evaluate_0.18
## [7] stringi_1.7.8 cachem_1.0.6 rlang_1.0.6
## [10] cli_3.4.1 rstudioapi_0.14 jquerylib_0.1.4
## [13] bslib_0.4.1 rmarkdown_2.18 tools_4.2.2
## [16] stringr_1.4.1 xfun_0.35 yaml_2.3.6
## [19] fastmap_1.1.0 compiler_4.2.2 BiocManager_1.30.19
## [22] htmltools_0.5.3 knitr_1.41 sass_0.4.4