I’m working with the Google Custom Search API in a Java Spring Boot application and facing a limitation where the API only supports up to 32 keywords per query. However, my application needs to process searches that often involve more than 32 keywords (sites, news, location, country, date Restriction, etc.), and I want to ensure all keywords are included in the search results.
Problem:
The Google Custom Search API returns an error if more than 32 keywords are included in a single query. I need a way to break down larger keyword lists into smaller, API-compliant queries and then combine the results.
I have tried it like building normal string queries
import com.google.api.client.http.javanet.NetHttpTransport;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.customsearch.Customsearch;
import com.google.api.services.customsearch.model.Result;
import com.google.api.services.customsearch.model.Search;
import com.arun.googleSearch.Master.Group.domain.Group;
import com.arun.googleSearch.Master.Group.domain.GroupRepository;
import com.arun.googleSearch.Master.category.domain.Category;
import com.arun.googleSearch.Master.category.domain.CategoryRepository;
import com.arun.googleSearch.Master.googesearch.data.GoogleSearchResponseData;
import com.arun.googleSearch.Master.googesearch.data.GoogleSearchResultData;
import com.arun.googleSearch.Master.googesearch.request.CreateGoogleSearchRequest;
import com.ponsun.googleSearch.config.GoogleSearchApiConfig;
import com.arun.googleSearch.infrastructure.exceptions.googleSearch_ApplicationException;
import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@RequiredArgsConstructor
@Service
public class GoogleSearchApiServiceImpl implements GoogleSearchApiService {
private final GoogleSearchApiConfig googleSearchConfig;
private final CategoryRepository categoryRepository;
private final GroupRepository groupRepository;
@Override
@Transactional(readOnly = true)
public GoogleSearchResponseData googleSearchRequest(String q, CreateGoogleSearchRequest createGoogleSearchRequest) {
String apiKey = googleSearchConfig.getGoogleApiKey();
String cx = googleSearchConfig.getGoogleCx();
int reqTimeOut = googleSearchConfig.getHttpReqTimeOut();
long startIndex = Optional.ofNullable(createGoogleSearchRequest.getStartIndex()).orElse(1);
long perPage = Optional.ofNullable(createGoogleSearchRequest.getPerPage()).orElse(10);
String dateRange = createGoogleSearchRequest.getDateRestrict();
String siteSearch =createGoogleSearchRequest.getMedia().isEmpty() ? null : constructSites(createGoogleSearchRequest);
q= """ + q + """;
String qry = q + buildCompanyAndLocation(createGoogleSearchRequest) + siteSearch + buildCustomQuery(createGoogleSearchRequest) + finalQuery(createGoogleSearchRequest);
Customsearch customsearch = initializeCustomSearch(reqTimeOut);
GoogleSearchResponseData responseData = new GoogleSearchResponseData();
List<GoogleSearchResultData> googleSearchResultDataList = new ArrayList<>();
long totalResults = 0;
try {
String geoCode = createGoogleSearchRequest.getMedia().equals("news") ? createGoogleSearchRequest.getCountry() : "";
String cr = geoCode.isEmpty() ? null : "country" + geoCode;
String gl = geoCode.isEmpty() ? null : geoCode;
String includOrExclude = findIncludeOrExclude(createGoogleSearchRequest.getMedia());
long startT = System.currentTimeMillis();
Search result = executeSearch(customsearch, qry, apiKey, cx, startIndex, perPage, dateRange, gl, cr, includOrExclude);
long endT = System.currentTimeMillis();
long elapsedT = endT - startT;
System.out.println("Time taken to process the executeSearch: " + elapsedT + " milliseconds");
totalResults = result.getSearchInformation().getTotalResults();
long startTime = System.currentTimeMillis();
for (Result item : result.getItems()) {
GoogleSearchResultData googleSearchResultData = createGoogleSearchResultData(item);
googleSearchResultDataList.add(googleSearchResultData);
}
long endTime = System.currentTimeMillis();
long elapsedTime = endTime - startTime;
System.out.println("Time taken to process the googleSearchResultData: " + elapsedTime + " milliseconds");
responseData.setTotalSearchResults(totalResults);
responseData.setItems(googleSearchResultDataList);
} catch (Exception e) {
e.printStackTrace();
throw new googleSearch_ApplicationException(e.getMessage());
}
return responseData;
}
private Search executeSearch(Customsearch customsearch, String qry, String apiKey, String cx, long startIndex, long perPage,
String dateRestrict, String gl, String cr, String includOrExclude) throws IOException {
System.out.println("=============>" + qry);
Customsearch.Cse.List list = customsearch.cse().list(qry);
list.setKey(apiKey);
list.setCx(cx);
list.setSiteSearchFilter(includOrExclude);
list.setStart(startIndex);
list.setNum(perPage);
list.setDateRestrict(dateRestrict);
list.setGl(gl);
list.setCr(cr);
return list.execute();
}
private String findIncludeOrExclude(String value){
switch (value){
case "include":
return "i";
case "exclude":
return "e";
case "news":
return "i";
case "sitesOnly":
return "i";
default:
return "i";
}
}
private String constructSites(CreateGoogleSearchRequest createGoogleSearchRequest){
String value = createGoogleSearchRequest.getMedia();
StringBuilder searchQuery = new StringBuilder();
//news
if(value.equals("news")) {
List<String> newsCategories = Stream.of(3)
.flatMap(gid -> categoryRepository.findByGroupId(gid).stream())
.map(Category::getName)
.collect(Collectors.toList());
if (newsCategories.size() > 15) {
newsCategories = newsCategories.stream().limit(15).collect(Collectors.toList());
}
String prefixedSites = newsCategories.stream()
.map(site -> " site:" + site)
.collect(Collectors.joining(" OR"));
searchQuery.append(prefixedSites);
}
//include or exclude social media
List<String> socialMediaSites = Stream.of(1)
.flatMap(gid -> categoryRepository.findByGroupId(gid).stream())
.map(Category::getName)
.collect(Collectors.toList());
if (socialMediaSites.size() > 15) {
socialMediaSites = socialMediaSites.stream().limit(15).collect(Collectors.toList());
}
if (value.equals("include")){
String prefixedSites = socialMediaSites.stream()
.map(site -> " site:" + site)
.collect(Collectors.joining(" OR"));
searchQuery.append(prefixedSites);
}
if (value.equals("exclude")){
String prefixedSites = socialMediaSites.stream()
.map(site -> " -" + site)
.collect(Collectors.joining(""));
searchQuery.append(prefixedSites);
}
//sites only
if(value.equals("sitesOnly")) {
List<String> includedSites = new ArrayList<>();
if (!createGoogleSearchRequest.getOnlyFromTheseSites().isEmpty()) {
for (String site : createGoogleSearchRequest.getOnlyFromTheseSites()) {
if (!site.contains(".")) {
site += ".com";
}
includedSites.add(" site:" + site);
}
}
if (!includedSites.isEmpty()) {
searchQuery.append(String.join(" OR ", includedSites));
}
}
return searchQuery.toString();
}
private Customsearch initializeCustomSearch(int reqTimeOut) {
try {
return new Customsearch(new NetHttpTransport(), new JacksonFactory(), httpRequest -> {
httpRequest.setConnectTimeout(reqTimeOut);
httpRequest.setReadTimeout(reqTimeOut);
});
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Error initializing Customsearch", e);
}
}
private GoogleSearchResultData createGoogleSearchResultData(Result item) {
String cseImageSrc = null;
if (item.getPagemap() != null) {
cseImageSrc = extractCseImageSrc(item.getPagemap());
}
String translateLink = "https://translate.google.com/translate?hl=&sl=auto&tl=en&u=" +item.getLink(); // URLEncoder.encode(item.getLink(), StandardCharsets.UTF_8.toString());
List<GoogleSearchResultData.InnerResult> details = new ArrayList<>();
if (item.getSnippet() != null){
for (String paragraph : item.getSnippet().split("n")) {
GoogleSearchResultData.InnerResult innerResult = new GoogleSearchResultData.InnerResult();
innerResult.setParagraphs(paragraph);
details.add(innerResult);
}
}
System.out.println(item.getTitle() + " - " + item.getLink());
return new GoogleSearchResultData(item.getTitle(), item.getLink(), item.getDisplayLink(), cseImageSrc, translateLink, details);
}
private String extractCseImageSrc(Map<String, List<Map<String, Object>>> pagemap) {
Object cseImageObject = pagemap.get("cse_image");
if (cseImageObject != null && cseImageObject instanceof List) {
List cseImageList = (List) cseImageObject;
if (!cseImageList.isEmpty()) {
Map cseImageMap = (Map) cseImageList.get(0);
return (String) cseImageMap.get("src");
}
}
return null;
}
private String buildCompanyAndLocation(CreateGoogleSearchRequest createGoogleSearchRequest){
StringBuilder searchQuery = new StringBuilder();
//location
if (createGoogleSearchRequest.getLocation() != null && !createGoogleSearchRequest.getLocation().isEmpty()) {
searchQuery.append(" location:").append(createGoogleSearchRequest.getLocation());
}
//company
if (createGoogleSearchRequest.getCompany() != null && !createGoogleSearchRequest.getCompany().isEmpty()) {
searchQuery.append(" company:").append(createGoogleSearchRequest.getCompany());
}
return searchQuery.toString();
}
private String buildCustomQuery(CreateGoogleSearchRequest createGoogleSearchRequest) {
long startTime = System.currentTimeMillis();
Map<Boolean, List<Integer>> groupedIds = createGoogleSearchRequest.getGroupIds().stream()
.flatMap(gid -> groupRepository.findById(gid.getGroupId()).stream())
.collect(Collectors.partitioningBy(Group::getIsOffence, Collectors.mapping(Group::getId, Collectors.toList())));
List<Integer> offensiveGroupIds = groupedIds.get(true);
StringBuilder searchQuery = new StringBuilder();
//exclude these sites
List<String> excludedSites = new ArrayList<>();
if (!createGoogleSearchRequest.getExcludeTheseSites().isEmpty()) {
for (String site : createGoogleSearchRequest.getExcludeTheseSites()) {
if (!site.contains(".")) {
site += ".com";
}
excludedSites.add(" -" + site);
}
}
if (!excludedSites.isEmpty()) {
searchQuery.append(String.join("", excludedSites));
}
// predicate offence
List<String> offensiveCategories = offensiveGroupIds.stream()
.flatMap(gid -> categoryRepository.findByGroupId(gid).stream()
.map(Category::getName)
.findFirst()
.stream()
)
.toList();
if (!offensiveCategories.isEmpty()) {
searchQuery.append(" (").append(
offensiveCategories.stream()
.map(category -> category.contains(" ") ? """ + category + """ : category)
.collect(Collectors.joining(" OR "))
).append(")");
}
// keywords
List<String> keyWords = createGoogleSearchRequest.getKeywords();
if(keyWords.size() > 5){
keyWords = keyWords.subList(0, 5);
}
if(!keyWords.isEmpty()){
searchQuery.append(" (").append(
createGoogleSearchRequest.getKeywords().stream()
.map(keyWord -> keyWord.contains(" ") ? """ + keyWord + """ : keyWord)
.collect(Collectors.joining(" OR "))
).append(")");
}
long endTime = System.currentTimeMillis();
long elapsedTime = endTime - startTime;
System.out.println("Time taken to process the buildCustomQuery: " + elapsedTime + " milliseconds");
return searchQuery.toString();
}
private String finalQuery(CreateGoogleSearchRequest createGoogleSearchRequest) {
StringBuilder searchQuery = new StringBuilder();
// before , after dates
if (createGoogleSearchRequest.getAfterDate() != null) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
String formattedDate = formatter.format(createGoogleSearchRequest.getAfterDate());
searchQuery.append(" after:").append(formattedDate);
}
if (createGoogleSearchRequest.getBeforeDate() != null) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
String formattedDate = formatter.format(createGoogleSearchRequest.getBeforeDate());
searchQuery.append(" before:").append(formattedDate);
}
// data sorting
if (createGoogleSearchRequest.getAfterDate() == null && createGoogleSearchRequest.getBeforeDate() == null) {
if (Objects.equals(createGoogleSearchRequest.getSort(), "date")) {
searchQuery.append(" sort=").append(createGoogleSearchRequest.getSort());
}
}
return searchQuery.toString();
}
}