I try to the same like this:
New ID column depending on another column in R
creating an Identifier ID column based on names.
Now I’m asking my self is there a chance, to account for similarities. (e.g. in my case I read my data via pdftools and it is likly that single letters are different but I still want to create an unique ID)
e.g. cases like (“e.k. maswabi” and “e.l. maswabi”)…..
dput(finish1[10:15,2:length(finish1)])
structure(list(name.adress = c("mrs v.s. makwa, p.o. box 75, kanye. ",
"n. tsele, p.o. box 8, mochudi.",
"h.m. gaorutwe, modern restaura p.o. box 67, molepolole.",
"g.m. kwerepe, p.o. box 92, maun.", "s. rakgathi, p.o. box 125, mochudi.",
"j. sedio, p.o. box 376, kanye."),
name = c("mrs v.s. makwa", "n. tsele", "h.m. gaorutwe", "g.m. kwerepe",
"s. rakgathi", "j. sedio"), pobox = c(" 75", " 8", " 67",
" 92", " 125", " 376"), living = c("kanye", "mochudi", "molepolole",
"maun", "mochudi", "kanye"), has_telephone = c(FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE), licence = c("restaurant , small , general , trading",
"small , general , trading", "small , general , restaurant , trading",
"small , general , trading", "restaurant", "restaurant"),
is_a_Dealer = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
communication = c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), buisness = c("kanye", NA, "molepolole", NA, "dikgonnye",
NA), council = c("southern district council ",
"kgatleng district council ", "kweneng district council ",
"north west district council ", "kgatleng district council ",
"southern district council "), date = c("25. 2.77",
"25. 2.77", "17. 2.77", "7. 2.77", "25. 2.77", "25. 2.77"
), full_string = c("mrs v.s. makwa, restaurant and kanye, main reef southern district 25. 2.77§§-§§p.o. box 75, small general road council§§-§§kanye. trading",
"n. tsele, small general dikalakaneng kgatleng district 25. 2.77§§-§§p.o. box 8, trading council§§-§§mochudi.",
"h.m. gaorutwe, small general mokgalong ward, kweneng district 17. 2.77§§-§§modern restaurant, trading molepolole council§§-§§p.o. box 67,§§-§§molepolole.",
"g.m. kwerepe, small general titloyamokole north west district 7. 2.77§§-§§p.o. box 92, trading council§§-§§maun.",
"s. rakgathi, restaurant dikgonnye kgatleng district 25. 2.77§§-§§p.o. box 125, council§§-§§mochudi.",
"j. sedio, restaurant pltseng southern district 25. 2.77§§-§§p.o. box 376, council§§-§§kanye."
), buisness_by_location = c("t and kanye, main reef s l road c g",
"l dikalakaneng k g c ",
"l mokgalong ward, k g molepolole c ",
"l titloyamokole n g c ",
"t dikgonnye k c ",
"t pltseng s c "
), caseID = 10:15, year = c(1977, 1977, 1977, 1977, 1977,
1977)), row.names = c(NA, -6L), class = c("tbl_df", "tbl",
"data.frame"))