Contents

Last modified: 2022-12-08 23:17:19
Compiled: 2022-12-08 23:17:47

1 Getting started

1.1 Requirements

  • macOSX 10.15 with 3.6GHz Intel Core i9 or similar specs
  • RAM: 32GB
  • Available storage: > 20GB

1.2 Installation of agGraphSearch

Once agGraphSearch is installed, it can be loaded by the following command.

#Clean the enbironment
rm(list=ls())

#Install
install.packages("devtools")
devtools::install_github( "kumeS/agGraphSearch", force = TRUE)

#Load packages
library(agGraphSearch)
library(magrittr)

2 Read the search terms obtained from PolyInfo databse.

In this tutorial, the Japanese terms obtained from PolyInfo Database will be used and the number of terms is total 510.

#Read Japanese terms
fp <- system.file("extdata", "PoLyInfo_terms_Jpn_v01.csv", package="agGraphSearch")
wordsj <- data.frame(readr::read_csv(fp, col_names = T, show_col_types = FALSE))

#Processing
words <- unique(unlist(wordsj, use.names = F))
words <- words[!is.na(words)]

#show the head
head(words)
length(words)

I also showed how to use the English terms obtained from PolyInfo Database (total 463 terms).

if(F){
#Read English terms
fp <- system.file("extdata", "PoLyInfo_terms_En_v01.csv", package="agGraphSearch")
wordse <- data.frame(readr::read_csv(fp, col_names = T, show_col_types = FALSE))

#Processing
words <- unique(unlist(wordse, use.names = F))
words <- words[!is.na(words)]

#show the head
head(words)
}

3 Wikidata datasets (25th Sep 2021)

3.1 Download the Wikidata datasets from Google Drive. (Updated on 08-DEC-2022)

Download below files and save on the local environment.

## Download Class-related hierarchy dataset (Common to English and Japanese)
browseURL("https://drive.google.com/file/d/14sBKcU0vDZG18krsgXw7sVMkEG10muPd/view?usp=share_link")
# the modified version
browseURL("https://drive.google.com/file/d/16Jh7YMnLWplyHMxdiA3e6t3cIIDy0kvz/view?usp=share_link")

## Download Japanese labels
browseURL("https://drive.google.com/file/d/116zAB0Ycggd6Fu45CSSSzM9kDDF5xwlB/view?usp=share_link")

## Download English labels
browseURL("https://drive.google.com/file/d/1_BOq3jh3QNqHarlvCyFkp--0dUwGYCPR/view?usp=share_link")

Then, I moved the files to your working directory. In this time, I manually moved their files to the “04_Wiki_PolyInfo_00” folder in the working directory .

3.2 Source scripts

#Source script
fp <- system.file("script", "srachWikidataPolyInfo.R", package="agGraphSearch")
source(fp)

3.3 Import Wikidata datasets

If you download files, you can skip this.

#Multi-comment-out
if(F){
#Load the hierarchy dataset
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/Relation_P279_P31_df.Rds")

#Check data
head(ClassHi)
#table(ClassHi$Property)

#Remove loops
table(ClassHi$Subject != ClassHi$Object)
ClassHi <- ClassHi[ClassHi$Subject != ClassHi$Object,]
saveRDS(ClassHi, "./04_Wiki_PolyInfo_00/ClassHiR.Rds")

#Load the label dataset
Labels01 <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_rdfs_df.Rds")
Labels02 <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_Altlabel_df.Rds")

#Bind, extract and check labels
Labels <- rbind(Labels01, Labels02)
head(Labels)
saveRDS(Labels, "./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")  
}

Here Wikidata datasets were imported.

#Load the hierarchy dataset (100,193,046 triples)
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/ClassHiR.Rds")

#Load the Japanese label dataset
Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")

4 Linking of input terms to the wikidata labels

Input are 510 terms in Japanese.

#Show data
head(words)
head(Labels)

#Linking to the wikidata entities (complete lexical matching)
LabelsR <- Labels[Labels$Object %in% words,]
LabelsR <- LabelsR[LabelsR$Subject %in% unique(ClassHi$Subject),]

#Check data
dim(LabelsR)
head(LabelsR)

#rename row
rownames(LabelsR) <- 1:nrow(LabelsR)
dim(LabelsR)

#Obtain the subject QID
QID.d <- data.frame(QID=unique(LabelsR$Subject))

5 Check relations via SPARQL query

This program executes SPARQL query.

Input entities are 247.

5.1 excluding the adjacent entity

##Proxy setting if you need. e.g.
#proxy_url = "http://wwwproxy.osakac.ac.jp:8080"; ProxySet(proxy_url)

#set parameters
ExcluQ <- c("Q101352", "Q16521", "Q11424", "Q6256", 
            "Q12308941", "Q11879590", "Q5", "Q5327369", 
            "Q577", "Q4167410", "Q3305213", "Q13406463", 
            "Q35120", "Q3695082", "Q131257", "Q80071", 
            "Q4830453", "Q482994", "Q20202269", "Q2996394")
NumQ <- length(ExcluQ)

#run
QID.d <- SearchEntity()

#remove hits
QID.d <- QID.d[apply(QID.d[,-1], 1, function(x) sum(x)) == 0,]

#show results
dim(QID.d)
head(QID.d)

5.2 excluding the particular property

#Source script
fp <- system.file("script", "srachWikidataPolyInfo.R", package="agGraphSearch")
source(fp)

#set parameters
ExcluP <- c("P131","P21","P747","P4743","P577")
NumP <- length(ExcluP)

#run
QID.d <- SearchProperty()

#remove hits
QID.d <- QID.d[apply(QID.d[,-1], 1, function(x) sum(x)) == 0,]

#show results
dim(QID.d)
head(QID.d)

#Save
#saveRDS(QID.d, "./04_Wiki_PolyInfo_XX/QID.d.Rds")

#Re-load
#QID.d <- readRDS("./04_Wiki_PolyInfo_XX/QID.d.Rds")

5.3 post-processing for search entities

#renames rows
rownames(QID.d) <- 1:nrow(QID.d)
QID.d01 <- QID.d[,-c(2:ncol(QID.d))]

#the number of QID
length(QID.d01)
head(QID.d01)

6 Seaching upper concepts from the search entities

Here the upper-level searches are performed.

#show labels
SearchEntity <- Labels[Labels$Subject %in% QID.d01 & Labels$Property == "rdfs:label",]
head(SearchEntity); dim(SearchEntity)

##################################################################
#Upper search up to 30 steps
##################################################################
#Need to source "srachWikidataPolyInfo.R" script

#set parameters
QID.list <- QID.d01

#run
QID.dat <- upperSearch(Depth = 30)
length(QID.dat)

#Save
#saveRDS(QID.dat, "./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#saveRDS(QID.list, "./04_Wiki_PolyInfo_XX/QID.list.Rds")

#Re-load
#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")

6.1 Visualize Individual network diagrams.

#Need to source "srachWikidataPolyInfo.R" script

#run
VisualizeGraph()

Check the results in the folder (04_Wiki_PolyInfo_01)

6.2 Calculate the common upper entities

Here we calculate the lower-level search entities and then identify the common upper entities.

#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#str(QID.dat)

#################################################
#Calculate the common upper entities
#################################################
#Need to source "srachWikidataPolyInfo.R" script

QID.dat.res <- calcCU()
str(QID.dat.res)

#Select QID.dat
QID.dat_02 <- QID.dat.res$QID.dat

head(QID.dat_02)
dim(QID.dat_02)
rownames(QID.dat_02) <- 1:nrow(QID.dat_02)

##Check NA
table(is.na(QID.dat_02))
table(is.na(QID.dat_02$Subject))
table(is.na(QID.dat_02$Object))

#################################################
## Visualize entire network diagrams
#################################################
#Merge labels
QID.datR <- MergeWikiData(Data=QID.dat_02, 
                          Labels=Labels)
head(QID.datR)
QID.datR <- QID.datR[QID.datR$subject != QID.datR$parentClass,]
QID.datR$subjectLabel <- paste0(QID.datR$subjectLabel, ".", QID.datR$subject)
QID.datR$parentClassLabel <- paste0(QID.datR$parentClassLabel, ".", QID.datR$parentClass)
head(QID.datR)

agGraphSearch:::agVisNetwork(Graph=QID.datR,
                             NodeColorRandom=F, Count=2, 
                             StarSize=10, 
                             Output=T, Browse=F,
                             FilePath= paste0("./agVisNetwork_entire.html"))
suppressMessages(filesstrings::file.move(files = paste0("./agVisNetwork_entire.html"),
                        destinations = "./04_Wiki_PolyInfo_02", overwrite = T))

#################################################
#Count entities and triples
#################################################
#entities
E01 <- length(unique(c(QID.dat_02$Subject, QID.dat_02$Object)))
E01
#[1] 763

#triples
E02 <- length(unique(QID.dat_02$triple))
E02
#[1] 1292

#################################
#Calculate CU entities
#################################
#Select CUcounts
CUcounts00 <- QID.dat.res$CUcounts

#Calculate
cu_table <- table(CUcounts00)
cu_table <- cu_table[rev(order(cu_table))]
cu_table1 <- data.frame(parentClass=names(cu_table),
                        Freq=as.numeric(cu_table), 
                        row.names = 1:length(cu_table),
                        stringsAsFactors = F)

head(cu_table1)
cu_table2 <- merge(cu_table1, 
                   Labels[Labels$Property == "rdfs:label",],
                   by.x = "parentClass", by.y = "Subject", 
                   all.x = T, all.y = F, sort=F)
cu_table3 <- cu_table2[cu_table2$Freq > 1,]
head(cu_table3)
tail(cu_table3)

#CU entities
dim(cu_table3)
#[1] 346   5

#Save
#saveRDS(cu_table3, "./04_Wiki_PolyInfo_XX/cu_table3.Rds")

#re-load
#cu_table3 <- readRDS("./04_Wiki_PolyInfo_XX/cu_table3.Rds")

6.3 Graph analysis algorism

#QID.dat <- readRDS("./04_Wiki_PolyInfo_XX/QID.dat.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#str(QID.dat)

#run pre-processing
#Need to source "srachWikidataPolyInfo.R" script
graph_Dat <- MeargeDatasets()
str(graph_Dat)

#Save
#saveRDS(graph_Dat, "./04_Wiki_PolyInfo_XX/graph_Dat.Rds")

#re-load
#graph_Dat <- readRDS("./04_Wiki_PolyInfo_XX/graph_Dat.Rds")

#Calculation
SearchRnage <- agGraphAnalysis(graphList=graph_Dat$graph_List,
                               search_List=graph_Dat$search_List,
                               CU_List=graph_Dat$CU_List)
#system("mv -f ./Results_* ./04_Wiki_PolyInfo_02")

#Levels
SearchRnage00 <- SearchRnage[!is.na(SearchRnage$Levels),]
SearchRnage00 <- SearchRnage00[order(SearchRnage00$Levels),]
rownames(SearchRnage00) <- 1:nrow(SearchRnage00)
head(SearchRnage00)
tail(SearchRnage00)

#Merge labels
SearchRnage01 <- merge(SearchRnage00,
                   Labels[Labels$Property == "rdfs:label",],
                   by.x = "CommonEntity", by.y = "Subject", 
                   all.x = T, all.y = F, sort=F)
SearchRnage01 <- SearchRnage01[order(SearchRnage01$Levels),]
rownames(SearchRnage01) <- 1:nrow(SearchRnage01)
SearchRnage01$Property <- "rdfs:label"
SearchRnage01$Object[is.na(SearchRnage01$Object)] <- SearchRnage01$CommonEntity[is.na(SearchRnage01$Object)]
head(SearchRnage01, n=15)
tail(SearchRnage01, n=15)

#Save
#saveRDS(SearchRnage01, "./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
#readr::write_excel_csv(SearchRnage01, "./04_Wiki_PolyInfo_XX/SearchRnage01.csv")

#Re-load
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")

8 Calucation & Evaluation for all candidates

#rm(list=ls())
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
dim(SearchRnage01)

#run
mergeECUsearch()

Outputs: ECU.list.Rds and TotalData.Rds

ECU.list: results of number of unique triples (UniTripleNum) and number of unique entity (UniEntityNum) for each ECU entity.

TotalData: Total sets of search results

9 For Evaluation

#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
#SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")

str(ECU.list)
CountDat <- data.frame(No=1:max(ECU.list$Levels),
                       UniEntityNum=NA,
                       UniTripleNum=NA,
                       UniJpnLab=NA,
                       AccEntityNum=NA,
                       AccTripleNum=NA,
                       AccJpnLab=NA,
                       ExtractedNum=NA,
                       RateAccEntity=NA,
                       TotalAnswerNum=NA)

##Evaluation
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#OCR <- unlist(readr::read_csv("./高分子辞典OCR/高分子辞典OCR_2column_jpn_RRR.csv", col_names=F, show_col_types = FALSE), use.names = F)
OCR.eva <- OCR[OCR %in% Labels$Object]
length(OCR.eva)

#step 03
#Evaluation
CountDat <- EvaluationAll()
CountDat

10 Refinement for the ontology candidate

#rm(list=ls())
SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
ClassHi <- readRDS("./04_Wiki_PolyInfo_00/ClassHiR.Rds")
QID.id <- readRDS("./04_Wiki_PolyInfo_XX/QID.d.Rds")$QID
CU_List <- readRDS("./04_Wiki_PolyInfo_XX/CU_List.Rds")

#Refinement: get the direction of the search entity
runRefinement()

11 Processing after the refinement

#system(paste0("rm -rf ./04_Wiki_PolyInfo_06/*"))
#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
head(ECU.list)
k <- max(ECU.list$Levels)

##step 01
for(m in 1:k){
  #m <- 1
  message("m: ", m)
  d <- ECU.list[ECU.list$Levels == m,]
  d1 <- as.numeric(rownames(d))
  temp <- NULL
  system(paste0("rm -rf ", "./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"))
  for(n in d1){
    #n <- 1
    message("n: ", n)
    Dat <- readRDS(paste0("./04_Wiki_PolyInfo_05/",
                          formatC(n, width = 4, flag = "0"),
                          "_L", ECU.list$Levels[n], "_",
                          ECU.list$CommonEntity[n], "_refine.Rds"))
    readr::write_csv(Dat, 
                     paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"), 
                     append=T, col_names=F)
}
#getwd()
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, ".csv"),
                   show_col_types = FALSE, col_names=F))
colnames(temp) <- c("Subject", "Property", "Object")
temp$triple <- paste0(temp$Subject, ".", temp$Property, ".", temp$Object)
rownames(temp) <- 1:nrow(temp)
temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
#head(temp)
readr::write_csv(temp, paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", m, "R.csv"), append=F, col_names=F)
}

#Remove
if(length(dir(path = "./04_Wiki_PolyInfo_06", pattern = "R_Acc.csv")) != 0){
  system("rm -rf ./04_Wiki_PolyInfo_06/Refinement_OutTriples_L*R_Acc.csv")
}

##step 02
for(j in 1:k){
#j <- 1
message("j: ", j)
temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", j, "R.csv"),
                        show_col_types = FALSE, col_names=F))
for(l in j:k){
#l <- 1
message("l: ", l)
readr::write_csv(temp,
                 paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", l, "R_Acc.csv"), 
                 append=T, col_names=F)  
}
}

12 For Evaluation

#rm(list=ls())
#ECU.list <- readRDS("./04_Wiki_PolyInfo_XX/ECU.list.Rds")
head(ECU.list)
k <- max(ECU.list$Levels)

CountDatRefine <- data.frame(No=1:k,
                       UniEntityNum=NA,
                       UniTripleNum=NA,
                       UniJpnLab=NA,
                       AccEntityNum=NA,
                       AccTripleNum=NA,
                       AccJpnLab=NA,
                       ExtractedNum=NA,
                       RateAccEntity=NA,
                       TotalAnswerNum=NA)

#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")
#OCR <- unlist(readr::read_csv("./高分子辞典OCR/高分子辞典OCR_2column_jpn_RRR.csv", col_names=F, show_col_types = FALSE), use.names = F)
OCR.eva <- OCR[OCR %in% Labels$Object]
length(OCR.eva)

##step 03
for(n in 1:k){
  #n <- 1
  message("n: ", n)
  
  #Uni
  temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R.csv"),
                                     show_col_types = FALSE, col_names=F))
  colnames(temp) <- c("Subject", "Property", "Object", "triple")
  rownames(temp) <- 1:nrow(temp)
  temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
  
  #head(temp)
  message("Uni")
  CountDatRefine$UniEntityNum[n]  <- length(unique(c(temp$Subject, temp$Object)))
  CountDatRefine$UniTripleNum[n] <- length(unique(temp$triple))
  
  #Jpn
  UniEntity <- unique(c(temp$Subject, temp$Object))
  #head(Labels)
  LabelsNum <- Labels[Labels$Subject %in% UniEntity, ]
  CountDatRefine$UniJpnLab[n] <- length(unique(LabelsNum$Subject))
  
  #Acc
  temp <- data.frame(readr::read_csv(paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R_Acc.csv"),
                                     show_col_types = FALSE, col_names=F))
  colnames(temp) <- c("Subject", "Property", "Object", "triple")
  rownames(temp) <- 1:nrow(temp)
  temp <- temp[as.numeric(rownames(unique(temp["triple"]))),]
  
  readr::write_csv(temp,
                   paste0("./04_Wiki_PolyInfo_06/Refinement_OutTriples_L", n, "R_Acc_Uni.csv"), 
                   append=F, col_names=F)  
  
  #head(temp)
  message("Acc")
  CountDatRefine$AccEntityNum[n]  <- length(unique(c(temp$Subject, temp$Object)))
  CountDatRefine$AccTripleNum[n] <- length(unique(temp$triple))
  
  #Jpn
  UniEntity <- unique(c(temp$Subject, temp$Object))
  #head(Labels)
  LabelsNum <- Labels[Labels$Subject %in% UniEntity, ]
  CountDatRefine$AccJpnLab[n] <- length(unique(LabelsNum$Subject))
 
  message("OCR")
  Eva00 <- unique(c(temp$Subject, temp$Object))
  
  #head(Labels)
  Eva01 <- unique(Labels[Labels$Subject %in% Eva00,]$Object)
  
  #Eva01
  Eva02 <- round(sum(Eva01 %in% OCR.eva)/length(OCR.eva)*100, 3)
  
  CountDatRefine$ExtractedNum[n] <- sum(Eva01 %in% OCR.eva)
  CountDatRefine$RateAccEntity[n] <- Eva02
  CountDatRefine$TotalAnswerNum[n] <- length(OCR.eva)
  print(CountDatRefine)

}

SearchRnage01 <- readRDS("./04_Wiki_PolyInfo_XX/SearchRnage01.Rds")
dim(SearchRnage01)

#01 Barplot
SearchRnage02 <- data.frame(No=names(table(SearchRnage01$Levels)),
                each_entity=as.numeric(table(SearchRnage01$Levels)))
head(SearchRnage02)

#ECU 
CountDatRefine$RateAccEntityPrec <- round((CountDatRefine$ExtractedNum / CountDatRefine$AccEntityNum)*100, 5)
CountDatRefine$RateAccEntityPrecLog <- round(logb((CountDatRefine$ExtractedNum / CountDatRefine$AccEntityNum)*100), 5)
CountDatRefine$RateAccJpnPrec <- round((CountDatRefine$ExtractedNum / CountDatRefine$AccJpnLab)*100, 5)
CountDatRefine$RateAccJpnPrecLog <- round(logb((CountDatRefine$ExtractedNum / CountDatRefine$AccJpnLab)*100), 5)

CountDatRefine$each_entity <- SearchRnage02$each_entity[1:k]
CountDatRefine$sum_entity <- cumsum(SearchRnage02$each_entity)[1:k]

readr::write_csv(CountDatRefine, 
                 "./04_Wiki_PolyInfo_XX/CountDatRefine.csv", append=F, col_names=T)

13 Plots of results

library(magrittr); library(plotly)
CountDat <- data.frame(readr::read_csv("./04_Wiki_PolyInfo_XX/CountDat.csv", show_col_types = FALSE, col_names=T))
CountDat
CountDatRefine <- data.frame(readr::read_csv("./04_Wiki_PolyInfo_XX/CountDatRefine.csv", show_col_types = FALSE, col_names=T))
CountDatRefine

######################################################
#Barplot 01
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~each_entity, type = 'bar', name = '',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of ECU entities"),
                      yaxis = list(title = 'Entity number', linewidth = 1),
                      xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
                                     tick0 = 1, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

if(!dir.exists("04_Wiki_PolyInfo_04")){dir.create("04_Wiki_PolyInfo_04")}
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_01.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Barplot 02
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~UniEntityNum, type = 'bar', name = '',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
                      yaxis = list(title = 'Entity number', linewidth = 1,
                                   range=c(0,70000000)),
                      xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
                                     tick0 = 1, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_02.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Barplot 03
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~AccEntityNum, type = 'bar', name = '',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
                      yaxis = list(title = 'Entity number', linewidth = 1,
                                   range=c(0,70000000)),
                      xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
                                     tick0 = 1, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_03.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Plot 01: Recall
######################################################
CountDat$Refine.RateAccEntity <- CountDatRefine$RateAccEntity
CountDat$Refine.RateAccJpnPrec <- CountDatRefine$RateAccJpnPrec

fig <- plotly::plot_ly(CountDat, x = ~No, y = ~RateAccEntity, 
                       type = 'scatter', mode = 'lines+markers', name = 'All candidates',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', 
                             size=8,
                             line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% add_trace(CountDat, y = ~Refine.RateAccEntity, name = 'Refinement', 
                         mode = 'lines+markers',
                         marker = list(color = 'rgb(255,128,0)', 
                                       size=8,
                                       line = list(color = 'rgb(8,48,107)', width = 0.5))) 
fig <- fig %>% plotly::layout(title = paste0(""),
                      yaxis = list(title = 'Recall rate (%)', linewidth = 1,
                                   tick0 = 0, dtick = 10, range=c(0,80), zeroline = F),
                      xaxis = list(title = "NES range", ticks = "outside", linewidth = 1,
                                     tick0 = 0, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_04R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Plot 02: Precision
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~RateAccJpnPrec, 
                       type = 'scatter', mode = 'lines+markers', name = 'All candidates',
                       hovertemplate = paste0('%{y}'),
                       marker = list(color = 'rgb(158,202,225)', 
                                     size=8,
                                     line = list(color = 'rgb(8,48,107)', width = 0.5)))
fig <- fig %>% add_trace(CountDat, y = ~Refine.RateAccJpnPrec, name = 'Refinement', 
                         mode = 'lines+markers',
                         marker = list(color = 'rgb(255,128,0)', 
                                       size=8,
                                       line = list(color = 'rgb(8,48,107)', width = 0.5))) 
fig <- fig %>% plotly::layout(title = paste0(""),
                      yaxis = list(title = 'Precision rate (%)', linewidth = 1,
                                   tick0 = 0, dtick = 1, type = "log"),
                      xaxis = list(title = "NES range", ticks = "outside", linewidth = 1,
                                     tick0 = 0, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_05R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#CountDatRefine: Barplot 02
######################################################
fig <- plotly::plot_ly(CountDatRefine, x = ~No, y = ~UniEntityNum, type = 'bar', name = '',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(255,158,72)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
                      yaxis = list(title = 'Entity number', linewidth = 1,
                                   range=c(0,70000000)),
                      xaxis = list(title = "NES", ticks = "outside", linewidth = 0,
                                     tick0 = 1, dtick = 1),
                      margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_06R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#CountDatRefine: Barplot 03
######################################################
fig <- plotly::plot_ly(CountDatRefine, x = ~No, y = ~AccEntityNum, type = 'bar', name = '',
                       hovertemplate = paste0('%{y}'),
                       marker = list(color = 'rgb(255,158,72)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
                              yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
                              xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
                              margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_07R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Barplot 04
######################################################
CountDat$Refine.each_entity <- CountDatRefine$each_entity
CountDat$Refine.UniEntityNum <- CountDatRefine$UniEntityNum
CountDat$Refine.AccEntityNum <- CountDatRefine$AccEntityNum

fig <- plotly::plot_ly(CountDat, x = ~No, y = ~each_entity, type = 'bar', name = 'All candidates',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.each_entity, name = 'Refinement', 
                         type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5))) 
fig <- fig %>% plotly::layout(title = paste0("Number of ECU entities"),
                              bargap = 0.1, bargroupgap = 0.1,
                              yaxis = list(title = 'Entity number', linewidth = 1),
                              xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
                              margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

if(!dir.exists("04_Wiki_PolyInfo_04")){dir.create("04_Wiki_PolyInfo_04")}
Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_01R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Barplot 05
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~UniEntityNum, type = 'bar', name = 'All candidates',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.UniEntityNum, name = 'Refinement', 
                         type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5))) 
fig <- fig %>% plotly::layout(title = paste0("Number of concepts per ENS (with dup.)"),
                              bargap = 0.1, bargroupgap = 0.1,
                              yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
                              xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
                              margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_02R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

######################################################
#Barplot 06
######################################################
fig <- plotly::plot_ly(CountDat, x = ~No, y = ~AccEntityNum, type = 'bar', name = 'All candidates',
               hovertemplate = paste0('%{y}'),
               marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 0.25)))
fig <- fig %>% add_trace(y = ~Refine.AccEntityNum, name = 'Refinement', 
                         type = 'bar', marker = list(color = 'rgb(255,128,0)', size=8, line = list(color = 'rgb(8,48,107)', width = 0.5))) 
fig <- fig %>% plotly::layout(title = paste0("Unique cumulative number of concepts"),
                              bargap = 0.1, bargroupgap = 0.1,
                              yaxis = list(title = 'Entity number', linewidth = 1, range=c(0,70000000)),
                              xaxis = list(title = "NES", ticks = "outside", linewidth = 0, tick0 = 1, dtick = 1),
                              margin = list(l=50, r=10, b=50, t=50, pad=4))
fig

Filename <- paste0("./Results_", format(Sys.time(), "%y%m%d"),"_03R.html")
htmlwidgets::saveWidget(plotly::as_widget(fig), file=Filename)
system(paste0("mv ", Filename, " ./04_Wiki_PolyInfo_04"))

14 Convert them to RDF format

In this step, the results extracted from Wikidata are converted to RDF.

Unnecessary strings were removed.

#QID.list <- readRDS("./04_Wiki_PolyInfo_XX/QID.list.Rds")
#Labels <- readRDS("./04_Wiki_PolyInfo_00/Label_ja_All_label_df.Rds")

#Read All candidates
convRDF_all(N = 7,
            FileName="./04_Wiki_PolyInfo_07/RDF_from_Wikidat_all_v02.nt")

#Read All data after refinement
convRDF_refinement(N = 7,
                   FileName="./04_Wiki_PolyInfo_07/RDF_from_Wikidat_refine_v02.nt")

Session information

## R version 4.2.2 (2022-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Big Sur 11.5.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] BiocStyle_2.26.0
## 
## loaded via a namespace (and not attached):
##  [1] bookdown_0.30       digest_0.6.30       R6_2.5.1           
##  [4] jsonlite_1.8.3      magrittr_2.0.3      evaluate_0.18      
##  [7] stringi_1.7.8       cachem_1.0.6        rlang_1.0.6        
## [10] cli_3.4.1           rstudioapi_0.14     jquerylib_0.1.4    
## [13] bslib_0.4.1         rmarkdown_2.18      tools_4.2.2        
## [16] stringr_1.4.1       xfun_0.35           yaml_2.3.6         
## [19] fastmap_1.1.0       compiler_4.2.2      BiocManager_1.30.19
## [22] htmltools_0.5.3     knitr_1.41          sass_0.4.4