# Load necessary packages
::p_load(
pacman
here, qs,
magrittr, janitor,
naniar, visdat,
easystats, sjmisc,
ggpubr,
gt, gtExtras, gtsummary,
openalexR, bibliometrix,
tidyverse )
Combine OpenAlex & Scopus
Projektseminar
Preparation
Comparison with Scopus
<- qs::qread(here("local_data/references_openalex.qs"))
references $scopus$raw <- qs::qread(here("local_data/references_scopus.qs"))$raw references
Identification of “missing” references
# Identify the number of scopus references missing in the openalex data
$scopus$raw %>%
referencesfilter(!is.na(doi)) %>%
mutate(doi_full = paste0("https://doi.org/", doi)) %>%
filter(!(doi_full %in% references$openalex$raw$doi)) %>%
glimpse()
Rows: 4,819
Columns: 17
$ scopusID <chr> "2-s2.0-85208654899", "2-s2.0-85201379794", "2-s2.0-85209…
$ doi <chr> "10.1016/j.csi.2024.103940", "10.1016/j.csi.2024.103903",…
$ pmid <chr> NA, NA, NA, NA, NA, NA, NA, NA, "39389117", NA, NA, NA, N…
$ authors <chr> "Alier M.|Pereira J.|García-Peñalvo F.J.|Casañ M.J.|Cabré…
$ affiliations <chr> "Universidad de Salamanca|Universitat Politécnica de Cata…
$ countries <chr> "Spain", "Russian Federation|Uzbekistan", "South Korea|Ho…
$ year <chr> "2025", "2025", "2025", "2025", "2025", "2025", "2025", "…
$ articletitle <chr> "LAMB: An open-source software framework to create artifi…
$ journal <chr> "Computer Standards and Interfaces", "Computer Standards …
$ volume <chr> "92", "92", "210", "194", "163", "149", "95", "106", "369…
$ issue <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, "1", NA, "1", "1", NA…
$ pages <chr> "", "", "", "", "", "", "", "", "625-632", "", "", "", "2…
$ keywords <chr> "Education domain|Generative artificial intelligence|IMS …
$ abstract <chr> "This paper presents LAMB (Learning Assistant Manager and…
$ ptype <chr> "Article", "Article", "Article", "Article", "Article", "A…
$ timescited <chr> "0", "0", "0", "0", "0", "0", "2", "1", "0", "0", "0", "0…
$ doi_full <chr> "https://doi.org/10.1016/j.csi.2024.103940", "https://doi…
Extract DOIs for missing references
<- list()
missing_references
# Format DOIs
$scopus_dois <- references$scopus$raw %>%
missing_referencesfilter(!is.na(doi)) %>%
mutate(doi = paste0("https://doi.org/", doi)) %>%
filter(!(doi %in% references$openalex$raw$doi)) %>%
pull(doi)
# Split DOIs into chunks of 25
<- 10
chunk_size $scopus_dois_chunks <- split(missing_references$scopus_dois, ceiling(seq_along(missing_references$scopus_dois) / chunk_size)) missing_references
Completion of OpenAlex data
Mining missing references via OpenAlex API
# Download missing references via API
$data$chunks <- map(
missing_references$scopus_dois_chunks, function(chunk) {
missing_referencesSys.sleep(2) # Pause for 1 second
tryCatch(
{# Attempt the API call
::oa_fetch(
openalexRentity = "works",
doi = chunk,
verbose = TRUE
)
},error = function(e) {
# Handle the error
message("Error with chunk: ", paste(chunk, collapse = ", "))
message("Error message: ", e$message)
NULL # Return NULL for failed chunks
}
) })
# Combine rows
$data$combined <- bind_rows(missing_references$data$chunks) %>%
missing_referencesmutate(mining_source = "openalex_rerun_doi")
::qsave(missing_references, file = here("local_data/missing_references.qs")) qs
Quality control
# Check for duplicates based on OpenAlex ID
$data$combined %>%
missing_referencesgroup_by(id) %>%
summarise(n = n()) %>%
frq(n, sort.frq = "desc")
n <integer>
# total N=4736 valid N=4736 mean=1.00 sd=0.00
Value | N | Raw % | Valid % | Cum. %
---------------------------------------
1 | 4736 | 100 | 100 | 100
<NA> | 0 | 0 | <NA> | <NA>
# Check for duplicates based on DOI
$data$combined %>%
missing_referencesdistinct(id, .keep_all = TRUE) %>% # exclude ID duplicates
filter(!is.na(doi)) %>% # exclude cases without DOI
group_by(doi) %>%
summarise(n = n()) %>%
frq(n, sort.frq = "desc")
n <integer>
# total N=4734 valid N=4734 mean=1.00 sd=0.02
Value | N | Raw % | Valid % | Cum. %
---------------------------------------
1 | 4732 | 99.96 | 99.96 | 99.96
2 | 2 | 0.04 | 0.04 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
<- list()
duplicates
# Extract duplicated IDs
$missing_references$combined$doi$string <- missing_references$data$combined %>%
duplicatesdistinct(id, .keep_all = TRUE) %>%
filter(!is.na(doi)) %>%
group_by(doi) %>%
summarise(n = n()) %>%
filter(n > 1) %>%
pull(doi)
# Extract cases with duplicated IDs
$missing_references$combined$doi$data <- missing_references$data$combined %>%
duplicatesfilter(doi %in% duplicates$missing_references$combined$doi$string)
# Extract cases to be deleted
$missing_references$combined$doi$delete <- duplicates$missing_references$combined$doi$data %>%
duplicatesmutate(id_number = as.numeric(sub(".*W", "", id))) %>%
group_by(doi) %>% # Group by `doi`
slice_min(id_number, n = 1, with_ties = FALSE) %>%
select(-id_number)
$data$raw <- missing_references$data$combined %>%
missing_referencesdistinct(id, .keep_all = TRUE) %>% # delete duplicates based on ID
anti_join(duplicates$missing_references$combined$doi$delete, by = "id")
Merging OpenAlex data
# Combine the missing references with the existing data
$openalex$combined$api <- references$openalex$raw %>%
referencesmutate(mining_source = "openalex_initial") %>%
bind_rows(., missing_references$data$raw)
Quality control
$openalex$combined$api %>%
references::skim() skimr
Name | Piped data |
Number of rows | 33254 |
Number of columns | 40 |
_______________________ | |
Column type frequency: | |
character | 24 |
list | 8 |
logical | 5 |
numeric | 3 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
id | 0 | 1.00 | 30 | 32 | 0 | 33079 | 0 |
title | 6 | 1.00 | 3 | 500 | 0 | 32287 | 0 |
display_name | 6 | 1.00 | 3 | 500 | 0 | 32287 | 0 |
ab | 2325 | 0.93 | 0 | 51771 | 11 | 30110 | 0 |
publication_date | 0 | 1.00 | 10 | 10 | 0 | 3052 | 0 |
so | 6257 | 0.81 | 1 | 244 | 0 | 8410 | 0 |
so_id | 6257 | 0.81 | 27 | 32 | 0 | 8436 | 0 |
host_organization | 13463 | 0.60 | 3 | 155 | 0 | 1308 | 0 |
issn_l | 11008 | 0.67 | 9 | 9 | 0 | 7690 | 0 |
url | 127 | 1.00 | 21 | 273 | 0 | 32942 | 0 |
pdf_url | 21198 | 0.36 | 29 | 359 | 0 | 11940 | 0 |
license | 22241 | 0.33 | 3 | 21 | 0 | 11 | 0 |
version | 14375 | 0.57 | 15 | 16 | 0 | 3 | 0 |
first_page | 13455 | 0.60 | 1 | 15 | 0 | 5079 | 0 |
last_page | 13563 | 0.59 | 1 | 15 | 0 | 5092 | 0 |
volume | 13865 | 0.58 | 1 | 25 | 0 | 684 | 0 |
issue | 17210 | 0.48 | 1 | 30 | 0 | 646 | 0 |
oa_status | 0 | 1.00 | 4 | 7 | 0 | 6 | 0 |
oa_url | 13664 | 0.59 | 20 | 359 | 0 | 19241 | 0 |
language | 2 | 1.00 | 2 | 2 | 0 | 8 | 0 |
cited_by_api_url | 0 | 1.00 | 53 | 55 | 0 | 33079 | 0 |
doi | 2024 | 0.94 | 26 | 96 | 0 | 31055 | 0 |
type | 0 | 1.00 | 6 | 12 | 0 | 6 | 0 |
mining_source | 0 | 1.00 | 16 | 18 | 0 | 2 | 0 |
Variable type: list
skim_variable | n_missing | complete_rate | n_unique | min_length | max_length |
---|---|---|---|---|---|
author | 253 | 0.99 | 32161 | 1 | 12 |
grants | 28882 | 0.13 | 2780 | 0 | 33 |
counts_by_year | 15199 | 0.54 | 5709 | 0 | 2 |
ids | 0 | 1.00 | 33079 | 1 | 5 |
referenced_works | 10700 | 0.68 | 22293 | 0 | 447 |
related_works | 403 | 0.99 | 23309 | 1 | 20 |
concepts | 0 | 1.00 | 33074 | 5 | 5 |
topics | 0 | 1.00 | 32528 | 0 | 5 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
is_oa | 124 | 1 | 0.53 | TRU: 17652, FAL: 15478 |
is_oa_anywhere | 0 | 1 | 0.59 | TRU: 19524, FAL: 13730 |
any_repository_has_fulltext | 0 | 1 | 0.29 | FAL: 23747, TRU: 9507 |
is_paratext | 0 | 1 | 0.00 | FAL: 33254 |
is_retracted | 0 | 1 | 0.00 | FAL: 33209, TRU: 45 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
relevance_score | 4734 | 0.86 | 9.91 | 26.91 | 0.04 | 0.84 | 3.13 | 7.26 | 986.22 | ▇▁▁▁▁ |
cited_by_count | 0 | 1.00 | 9.42 | 38.52 | 0.00 | 0.00 | 1.00 | 5.00 | 1878.00 | ▇▁▁▁▁ |
publication_year | 0 | 1.00 | 2021.81 | 2.57 | 1983.00 | 2021.00 | 2023.00 | 2024.00 | 2025.00 | ▁▁▁▁▇ |
Quick overview
- The number of missing abstracts has risen. Therefore, the Scopus data will be checked for the possibility of filling in the missing abstracts.
- The difference in the number of cases and the number of unique IDs indicates that there are duplicates in the data.
# Check for duplicates based on OpenAlex ID
$openalex$combined$api %>%
referencesgroup_by(id) %>%
summarise(n = n()) %>%
frq(n, sort.frq = "desc")
n <integer>
# total N=33079 valid N=33079 mean=1.01 sd=0.07
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
1 | 32904 | 99.47 | 99.47 | 99.47
2 | 175 | 0.53 | 0.53 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
# Extract duplicated IDs
$openalex$combined$id$string <- references$openalex$combined$api %>%
duplicatesgroup_by(id) %>%
summarise(n = n()) %>%
filter(n > 1) %>%
pull(id)
# Extract cases with duplicated IDs
$openalex$combined$id$data <- references$openalex$combined$api %>%
duplicatesfilter(id %in% duplicates$openalex$combined$id$string ) %>%
arrange(id)
# Extract uneven (odd) rows
<- duplicates$openalex$combined$id$data[seq(1, nrow(duplicates$openalex$combined$id$data), by = 2), ]
df1 <- duplicates$openalex$combined$id$data[seq(2, nrow(duplicates$openalex$combined$id$data), by = 2), ]
df2
# Compare the two data frames
summary(arsenal::comparedf(df1, df2))
Table: Summary of data.frames
version arg ncol nrow
-------- ---- ----- -----
x df1 40 175
y df2 40 175
Table: Summary of overall comparison
statistic value
------------------------------------------------------------ ------
Number of by-variables 0
Number of non-by variables in common 40
Number of variables compared 40
Number of variables in x but not y 0
Number of variables in y but not x 0
Number of variables compared with some values unequal 4
Number of variables compared with all values equal 36
Number of observations in common 175
Number of observations in x but not y 0
Number of observations in y but not x 0
Number of observations with some compared variables unequal 175
Number of observations with all compared variables equal 0
Number of values unequal 404
Table: Variables not shared
------------------------
No variables not shared
------------------------
Table: Other variables not compared
--------------------------------
No other variables not compared
--------------------------------
Table: Observations not shared
---------------------------
No observations not shared
---------------------------
Table: Differences detected by variable
var.x var.y n NAs
---------------------------- ---------------------------- ---- ----
id id 0 0
title title 0 0
display_name display_name 0 0
author author 0 0
ab ab 0 0
publication_date publication_date 0 0
relevance_score relevance_score 175 175
so so 0 0
so_id so_id 0 0
host_organization host_organization 0 0
issn_l issn_l 0 0
url url 0 0
pdf_url pdf_url 0 0
license license 0 0
version version 0 0
first_page first_page 0 0
last_page last_page 0 0
volume volume 0 0
issue issue 0 0
is_oa is_oa 0 0
is_oa_anywhere is_oa_anywhere 0 0
oa_status oa_status 0 0
oa_url oa_url 0 0
any_repository_has_fulltext any_repository_has_fulltext 0 0
language language 0 0
grants grants 50 50
cited_by_count cited_by_count 0 0
counts_by_year counts_by_year 4 4
publication_year publication_year 0 0
cited_by_api_url cited_by_api_url 0 0
ids ids 0 0
doi doi 0 0
type type 0 0
referenced_works referenced_works 0 0
related_works related_works 0 0
is_paratext is_paratext 0 0
is_retracted is_retracted 0 0
concepts concepts 0 0
topics topics 0 0
mining_source mining_source 175 0
Table: Differences detected (370 not shown)
var.x var.y ..row.names.. values.x values.y row.x row.y
---------------- ---------------- -------------- ----------------- ------------------- ------ ------
relevance_score relevance_score 1 8.167233 NA 1 1
relevance_score relevance_score 2 8.486296 NA 2 2
relevance_score relevance_score 3 280.2531 NA 3 3
relevance_score relevance_score 4 7.800102 NA 4 4
relevance_score relevance_score 5 7.819922 NA 5 5
relevance_score relevance_score 6 17.53957 NA 6 6
relevance_score relevance_score 7 1.024666 NA 7 7
relevance_score relevance_score 8 28.30165 NA 8 8
relevance_score relevance_score 9 30.97463 NA 9 9
relevance_score relevance_score 10 15.04404 NA 10 10
grants grants 7 NA NULL 7 7
grants grants 8 NA NULL 8 8
grants grants 10 NA NULL 10 10
grants grants 11 NA NULL 11 11
grants grants 14 NA NULL 14 14
grants grants 15 NA NULL 15 15
grants grants 18 NA NULL 18 18
grants grants 20 NA NULL 20 20
grants grants 21 NA NULL 21 21
grants grants 24 NA NULL 24 24
counts_by_year counts_by_year 166 NA NULL 166 166
counts_by_year counts_by_year 171 NA NULL 171 171
counts_by_year counts_by_year 172 NA NULL 172 172
counts_by_year counts_by_year 174 NA NULL 174 174
mining_source mining_source 1 openalex_initial openalex_rerun_doi 1 1
mining_source mining_source 2 openalex_initial openalex_rerun_doi 2 2
mining_source mining_source 3 openalex_initial openalex_rerun_doi 3 3
mining_source mining_source 4 openalex_initial openalex_rerun_doi 4 4
mining_source mining_source 5 openalex_initial openalex_rerun_doi 5 5
mining_source mining_source 6 openalex_initial openalex_rerun_doi 6 6
mining_source mining_source 7 openalex_initial openalex_rerun_doi 7 7
mining_source mining_source 8 openalex_initial openalex_rerun_doi 8 8
mining_source mining_source 9 openalex_initial openalex_rerun_doi 9 9
mining_source mining_source 10 openalex_initial openalex_rerun_doi 10 10
Table: Non-identical attributes
----------------------------
No non-identical attributes
----------------------------
# Check for duplicates based on DOI
$openalex$combined$api %>%
referencesdistinct(id, .keep_all = TRUE) %>% # exclude ID duplicates
filter(!is.na(doi)) %>% # exclude cases without DOI
group_by(doi) %>%
summarise(n = n()) %>%
frq(n, sort.frq = "desc")
n <integer>
# total N=31055 valid N=31055 mean=1.00 sd=0.00
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
1 | 31055 | 100 | 100 | 100
<NA> | 0 | 0 | <NA> | <NA>
$openalex$combined$raw <- references$openalex$combined$api %>%
referencesdistinct(id, .keep_all = TRUE)
Missing abstracts
# Identify cases with NA values in the variable ab
<- references$openalex$combined$raw %>%
na_abstracts filter(is.na(ab))
# Check if Scopus data provides an abstract for those references
<- na_abstracts %>%
na_abstracts_with_scopus mutate(doi_short = str_remove(doi, "https://doi.org/")) %>%
left_join(scopus$raw %>%
select(doi, abstract),
by = join_by(doi_short == doi)) %>%
mutate(ab = ifelse(is.na(ab), abstract, ab)) %>%
select(-abstract)
# Update the combined references with the new abstracts from Scopus
$openalex$combined$raw_updated <- references$openalex$combined$raw %>%
referencesleft_join(na_abstracts_with_scopus %>% select(id, ab), by = "id", suffix = c("", "_updated")) %>%
mutate(ab = ifelse(is.na(ab), ab_updated, ab)) %>%
select(-ab_updated)
Create correct data
# Overview
$openalex$combined$raw_updated %>%
references::skim() skimr
Name | Piped data |
Number of rows | 33079 |
Number of columns | 40 |
_______________________ | |
Column type frequency: | |
character | 24 |
list | 8 |
logical | 5 |
numeric | 3 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
id | 0 | 1.00 | 30 | 32 | 0 | 33079 | 0 |
title | 6 | 1.00 | 3 | 500 | 0 | 32287 | 0 |
display_name | 6 | 1.00 | 3 | 500 | 0 | 32287 | 0 |
ab | 1429 | 0.96 | 0 | 51771 | 11 | 30999 | 0 |
publication_date | 0 | 1.00 | 10 | 10 | 0 | 3052 | 0 |
so | 6193 | 0.81 | 1 | 244 | 0 | 8410 | 0 |
so_id | 6193 | 0.81 | 27 | 32 | 0 | 8436 | 0 |
host_organization | 13382 | 0.60 | 3 | 155 | 0 | 1308 | 0 |
issn_l | 10936 | 0.67 | 9 | 9 | 0 | 7690 | 0 |
url | 127 | 1.00 | 21 | 273 | 0 | 32942 | 0 |
pdf_url | 21052 | 0.36 | 29 | 359 | 0 | 11940 | 0 |
license | 22097 | 0.33 | 3 | 21 | 0 | 11 | 0 |
version | 14238 | 0.57 | 15 | 16 | 0 | 3 | 0 |
first_page | 13387 | 0.60 | 1 | 15 | 0 | 5079 | 0 |
last_page | 13495 | 0.59 | 1 | 15 | 0 | 5092 | 0 |
volume | 13788 | 0.58 | 1 | 25 | 0 | 684 | 0 |
issue | 17112 | 0.48 | 1 | 30 | 0 | 646 | 0 |
oa_status | 0 | 1.00 | 4 | 7 | 0 | 6 | 0 |
oa_url | 13547 | 0.59 | 20 | 359 | 0 | 19241 | 0 |
language | 2 | 1.00 | 2 | 2 | 0 | 8 | 0 |
cited_by_api_url | 0 | 1.00 | 53 | 55 | 0 | 33079 | 0 |
doi | 2024 | 0.94 | 26 | 96 | 0 | 31055 | 0 |
type | 0 | 1.00 | 6 | 12 | 0 | 6 | 0 |
mining_source | 0 | 1.00 | 16 | 18 | 0 | 2 | 0 |
Variable type: list
skim_variable | n_missing | complete_rate | n_unique | min_length | max_length |
---|---|---|---|---|---|
author | 253 | 0.99 | 32161 | 1 | 12 |
grants | 28778 | 0.13 | 2780 | 0 | 33 |
counts_by_year | 15151 | 0.54 | 5709 | 0 | 2 |
ids | 0 | 1.00 | 33079 | 1 | 5 |
referenced_works | 10682 | 0.68 | 22293 | 0 | 447 |
related_works | 403 | 0.99 | 23309 | 1 | 20 |
concepts | 0 | 1.00 | 33074 | 5 | 5 |
topics | 0 | 1.00 | 32528 | 0 | 5 |
Variable type: logical
skim_variable | n_missing | complete_rate | mean | count |
---|---|---|---|---|
is_oa | 124 | 1 | 0.53 | TRU: 17614, FAL: 15341 |
is_oa_anywhere | 0 | 1 | 0.59 | TRU: 19465, FAL: 13614 |
any_repository_has_fulltext | 0 | 1 | 0.29 | FAL: 23606, TRU: 9473 |
is_paratext | 0 | 1 | 0.00 | FAL: 33079 |
is_retracted | 0 | 1 | 0.00 | FAL: 33034, TRU: 45 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
relevance_score | 4559 | 0.86 | 9.91 | 26.91 | 0.04 | 0.84 | 3.13 | 7.26 | 986.22 | ▇▁▁▁▁ |
cited_by_count | 0 | 1.00 | 9.41 | 38.59 | 0.00 | 0.00 | 1.00 | 5.00 | 1878.00 | ▇▁▁▁▁ |
publication_year | 0 | 1.00 | 2021.81 | 2.58 | 1983.00 | 2021.00 | 2023.00 | 2024.00 | 2025.00 | ▁▁▁▁▇ |
$openalex$combined$raw_updated %>%
referencesfrq(type, language)
type <character>
# total N=33079 valid N=33079 mean=1.55 sd=1.38
Value | N | Raw % | Valid % | Cum. %
-----------------------------------------------
article | 28539 | 86.28 | 86.28 | 86.28
book-chapter | 40 | 0.12 | 0.12 | 86.40
editorial | 2 | 0.01 | 0.01 | 86.40
letter | 1 | 0.00 | 0.00 | 86.41
preprint | 4369 | 13.21 | 13.21 | 99.61
review | 128 | 0.39 | 0.39 | 100.00
<NA> | 0 | 0.00 | <NA> | <NA>
language <character>
# total N=33079 valid N=33077 mean=2.00 sd=0.06
Value | N | Raw % | Valid % | Cum. %
----------------------------------------
de | 1 | 0.00 | 0.00 | 0.00
en | 33052 | 99.92 | 99.92 | 99.93
es | 14 | 0.04 | 0.04 | 99.97
fr | 4 | 0.01 | 0.01 | 99.98
it | 3 | 0.01 | 0.01 | 99.99
nl | 1 | 0.00 | 0.00 | 99.99
pt | 1 | 0.00 | 0.00 | 100.00
sv | 1 | 0.00 | 0.00 | 100.00
<NA> | 2 | 0.01 | <NA> | <NA>
$openalex$correct <- references$openalex$combined$raw_updated %>%
referencesfilter(type %in% c("article", "conference-paper", "preprint")) %>%
filter(language == "en") %>%
filter(publication_year >= 2016) %>%
mutate(
# Create additional factor variables
publication_year_fct = as.factor(publication_year),
type_fct = as.factor(type),
# Clean abstracts
ab = ab %>%
str_replace_all("\ufffe", "") %>% # Remove invalid U+FFFE characters
str_replace_all("[^\x20-\x7E\n]", "") %>% # Optional: Remove other non-ASCII chars
iconv(from = "UTF-8", to = "UTF-8", sub = ""), # Ensure UTF-8 encoding
)
Export data
::qsave(references$openalex$correct, file = here("local_data/references.qs"))
qs::qsave(references, file = here("local_data/references_full.qs")) qs
<- oa2bibliometrix(references$openalex$correct)
references_bibliometrix saveRDS(references_bibliometrix, file = here("local_data/references_import_bibliometrix.RDS"))