I have a Spring Cloud Function that encounters a connection timeout when trying to parse a website using JSoup using AWS Lambda. This connection timeout happens only with cold starts.
This function is triggered in prod by an EventBridge rule.
The website is parsed/scraped correctly when the function is warmed up, but after 1.5h to 2 hours, the function goes cold and on the next attempt to parse the website, it results in a connection timeout.
When warmed up, the function usually completes within 8 to 10 seconds successfully. The lambda function has a timeout of 1 minute.
Currently, I’m using AWS Lambda without any VPC, as the function doesn’t access any resources in my account.
I have verified the internet connection before using JSoup as you can see in my code.
Also, I can test the function using the “test” option of the console and everything works normally.
The URL is hidden in the logs for clarification.
What could be causing this issue?
<code>2024-08-19T18:05:56.418Z
Trying to connect to https://pra.uf**/ (attempt 1)
Failed to connect to https://pra.uf**/ on attempt 1: Connection timed out
Trying to connect to https://pra.uf**/ (attempt 2)
Failed to connect to https://pra.uf**/ on attempt 2: Connection timed out
Trying to connect to https://pra.uf**/ (attempt 3)
Failed to connect to https://pra.uf**/ on attempt 3: Connection timed out
Trying to connect to https://pra.uf**/ (attempt 4)
2024-08-19T18:06:55.461Z 7bcf94b5-6b47-42d2-815d-46cf3dd4ff7a Task timed out after 60.08 seconds
<code>2024-08-19T18:05:56.418Z
Internet is available
2024-08-19T18:05:56.419Z
Trying to connect to https://pra.uf**/ (attempt 1)
2024-08-19T18:06:12.712Z
Failed to connect to https://pra.uf**/ on attempt 1: Connection timed out
2024-08-19T18:06:13.712Z
Trying to connect to https://pra.uf**/ (attempt 2)
2024-08-19T18:06:29.094Z
Failed to connect to https://pra.uf**/ on attempt 2: Connection timed out
2024-08-19T18:06:30.094Z
Trying to connect to https://pra.uf**/ (attempt 3)
2024-08-19T18:06:45.478Z
Failed to connect to https://pra.uf**/ on attempt 3: Connection timed out
2024-08-19T18:06:46.478Z
Trying to connect to https://pra.uf**/ (attempt 4)
2024-08-19T18:06:55.461Z 7bcf94b5-6b47-42d2-815d-46cf3dd4ff7a Task timed out after 60.08 seconds
</code>
2024-08-19T18:05:56.418Z
Internet is available
2024-08-19T18:05:56.419Z
Trying to connect to https://pra.uf**/ (attempt 1)
2024-08-19T18:06:12.712Z
Failed to connect to https://pra.uf**/ on attempt 1: Connection timed out
2024-08-19T18:06:13.712Z
Trying to connect to https://pra.uf**/ (attempt 2)
2024-08-19T18:06:29.094Z
Failed to connect to https://pra.uf**/ on attempt 2: Connection timed out
2024-08-19T18:06:30.094Z
Trying to connect to https://pra.uf**/ (attempt 3)
2024-08-19T18:06:45.478Z
Failed to connect to https://pra.uf**/ on attempt 3: Connection timed out
2024-08-19T18:06:46.478Z
Trying to connect to https://pra.uf**/ (attempt 4)
2024-08-19T18:06:55.461Z 7bcf94b5-6b47-42d2-815d-46cf3dd4ff7a Task timed out after 60.08 seconds
<code>public boolean isInternetAvailable() {
final URL url = new URL("https://www.google.com");
final URLConnection conn = url.openConnection();
conn.getInputStream().close();
} catch (IOException e) {
<code>public boolean isInternetAvailable() {
try {
final URL url = new URL("https://www.google.com");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
return true;
} catch (IOException e) {
return false;
}
}
</code>
public boolean isInternetAvailable() {
try {
final URL url = new URL("https://www.google.com");
final URLConnection conn = url.openConnection();
conn.connect();
conn.getInputStream().close();
return true;
} catch (IOException e) {
return false;
}
}
public class ScraperRU implements IScraperRU {
private static final int TIMEOUT_CONNECTION = 35000; // 35 seconds
private static final int RETRY_DELAY = 1000; // 1 second
private static final int MAX_RETRIES = 4;
private final Utils utils;
private final ScraperHelper scraperHelper;
private final String ruUrl;
public ScraperRU(Utils utils, ScraperHelper scraperHelper, @Value("${RU_URL}") String ruUrl) {
this.scraperHelper = scraperHelper;
public Document connectScraper(String webURL) throws InterruptedException {
if (!utils.isInternetAvailable()) {
throw new RuntimeException("No internet connection available");
System.out.println("Internet is available");
while (attempt < MAX_RETRIES) {
System.out.println("Trying to connect to " + webURL + " (attempt " + attempt + ")");
Connection.Response response = Jsoup.connect(webURL).timeout(TIMEOUT_CONNECTION)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
System.out.println("HTTP Status Code: " + response.statusCode());
System.out.println("HTTP Status Message: " + response.statusMessage());
System.out.println("Response Headers: " + response.headers());
if (response.statusCode() == 200) {
System.out.println("Unexpected HTTP status code: " + response.statusCode());
throw new RuntimeException("Failed to retrieve content from the website due to unexpected HTTP " + "status code: " + response.statusCode());
} catch (IOException e) {
System.out.println("Failed to connect to " + webURL + " on attempt " + attempt + ": " + e.getMessage());
if (attempt >= MAX_RETRIES) {
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
Thread.sleep(RETRY_DELAY);
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
<code>@Component
public class ScraperRU implements IScraperRU {
private static final int TIMEOUT_CONNECTION = 35000; // 35 seconds
private static final int RETRY_DELAY = 1000; // 1 second
private static final int MAX_RETRIES = 4;
private final Utils utils;
private final ScraperHelper scraperHelper;
private final String ruUrl;
public ScraperRU(Utils utils, ScraperHelper scraperHelper, @Value("${RU_URL}") String ruUrl) {
this.scraperHelper = scraperHelper;
this.utils = utils;
this.ruUrl = ruUrl;
}
public Document connectScraper(String webURL) throws InterruptedException {
if (!utils.isInternetAvailable()) {
throw new RuntimeException("No internet connection available");
}
int attempt = 0;
System.out.println("Internet is available");
while (attempt < MAX_RETRIES) {
try {
attempt++;
System.out.println("Trying to connect to " + webURL + " (attempt " + attempt + ")");
Connection.Response response = Jsoup.connect(webURL).timeout(TIMEOUT_CONNECTION)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
.execute();
System.out.println("HTTP Status Code: " + response.statusCode());
System.out.println("HTTP Status Message: " + response.statusMessage());
System.out.println("Response Headers: " + response.headers());
if (response.statusCode() == 200) {
return response.parse();
} else {
System.out.println("Unexpected HTTP status code: " + response.statusCode());
throw new RuntimeException("Failed to retrieve content from the website due to unexpected HTTP " + "status code: " + response.statusCode());
}
} catch (IOException e) {
System.out.println("Failed to connect to " + webURL + " on attempt " + attempt + ": " + e.getMessage());
if (attempt >= MAX_RETRIES) {
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
}
Thread.sleep(RETRY_DELAY);
}
}
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
}
</code>
@Component
public class ScraperRU implements IScraperRU {
private static final int TIMEOUT_CONNECTION = 35000; // 35 seconds
private static final int RETRY_DELAY = 1000; // 1 second
private static final int MAX_RETRIES = 4;
private final Utils utils;
private final ScraperHelper scraperHelper;
private final String ruUrl;
public ScraperRU(Utils utils, ScraperHelper scraperHelper, @Value("${RU_URL}") String ruUrl) {
this.scraperHelper = scraperHelper;
this.utils = utils;
this.ruUrl = ruUrl;
}
public Document connectScraper(String webURL) throws InterruptedException {
if (!utils.isInternetAvailable()) {
throw new RuntimeException("No internet connection available");
}
int attempt = 0;
System.out.println("Internet is available");
while (attempt < MAX_RETRIES) {
try {
attempt++;
System.out.println("Trying to connect to " + webURL + " (attempt " + attempt + ")");
Connection.Response response = Jsoup.connect(webURL).timeout(TIMEOUT_CONNECTION)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
.execute();
System.out.println("HTTP Status Code: " + response.statusCode());
System.out.println("HTTP Status Message: " + response.statusMessage());
System.out.println("Response Headers: " + response.headers());
if (response.statusCode() == 200) {
return response.parse();
} else {
System.out.println("Unexpected HTTP status code: " + response.statusCode());
throw new RuntimeException("Failed to retrieve content from the website due to unexpected HTTP " + "status code: " + response.statusCode());
}
} catch (IOException e) {
System.out.println("Failed to connect to " + webURL + " on attempt " + attempt + ": " + e.getMessage());
if (attempt >= MAX_RETRIES) {
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
}
Thread.sleep(RETRY_DELAY);
}
}
throw new RuntimeException("Failed to retrieve content from the website after " + MAX_RETRIES + " attempts");
}