I’m trying to create a Lambda function on AWS that processes some images, but there is an error when it’s invoked.
I’m trying to create a function that processes some images but it doesn’t work. I added the code to the js files.
handler.js:
<code>'use strict'
const request = require('request')
const urlParser = require('url')
const URLSearchParams = require('url').URLSearchParams
const shortid = require('shortid')
const asnc = require('async')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const sqs = new AWS.SQS({region: process.env.REGION})
const images = require('./images')()
function writeStatus (url, domain, results) {
let parsed = urlParser.parse(url)
parsed.hostname = domain
parsed.host = domain
const statFile = {
url: urlParser.format(parsed),
stat: 'downloaded',
downloadResults: results
}
return new Promise((resolve) => {
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/status.json', Body: Buffer.from(JSON.stringify(statFile, null, 2), 'utf8')}, (err, data) => {
resolve({stat: err || 'ok'})
})
})
}
function createUniqueDomain (url) {
const parsed = urlParser.parse(url)
const sp = new URLSearchParams(parsed.search)
let domain
if (sp.get('q')) {
domain = sp.get('q') + '.' + parsed.hostname
} else {
domain = shortid.generate() + '.' + parsed.hostname
}
domain = domain.replace(/ /g, '')
return domain.toLowerCase()
}
function crawl (domain, url, context) {
console.log('crawling: ' + url)
return new Promise(resolve => {
request(url, (err, response, body) => {
if (err || response.statusCode !== 200) { return resolve({statusCode: 500, body: err}) }
images.parseImageUrls(body, url).then(urls => {
images.fetchImages(urls, domain).then(results => {
writeStatus(url, domain, results).then(result => {
resolve({statusCode: 200, body: JSON.stringify(result)})
})
})
})
})
})
}
function queueAnalysis (domain, url, context) {
let accountId = process.env.ACCOUNTID
if (!accountId) {
accountId = context.invokedFunctionArn.split(':')[4]
}
let queueUrl = `https://sqs.${process.env.REGION}.amazonaws.com/${accountId}/${process.env.ANALYSIS_QUEUE}`
let params = {
MessageBody: JSON.stringify({action: 'analyze', msg: {domain: domain}}),
QueueUrl: queueUrl
}
return new Promise(resolve => {
sqs.sendMessage(params, (err, data) => {
if (err) { console.log('QUEUE ERROR: ' + err); return resolve({statusCode: 500, body: err}) }
console.log('queued analysis: ' + queueUrl)
resolve({statusCode: 200, body: {queue: queueUrl, msgId: data.MessageId}})
})
})
}
module.exports.crawlImages = function (event, context, cb) {
asnc.eachSeries(event.Records, (record, asnCb) => {
let { body } = record
try {
body = JSON.parse(body)
} catch (exp) {
return asnCb('message parse error: ' + record)
}
if (body.action === 'download' && body.msg && body.msg.url) {
const udomain = createUniqueDomain(body.msg.url)
crawl(udomain, body.msg.url, context).then(result => {
queueAnalysis(udomain, body.msg.url, context).then(result => {
asnCb(null, result)
})
})
} else {
asnCb('malformed message')
}
}, (err) => {
if (err) { console.log(err) }
cb()
})
}
</code>
<code>'use strict'
const request = require('request')
const urlParser = require('url')
const URLSearchParams = require('url').URLSearchParams
const shortid = require('shortid')
const asnc = require('async')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const sqs = new AWS.SQS({region: process.env.REGION})
const images = require('./images')()
function writeStatus (url, domain, results) {
let parsed = urlParser.parse(url)
parsed.hostname = domain
parsed.host = domain
const statFile = {
url: urlParser.format(parsed),
stat: 'downloaded',
downloadResults: results
}
return new Promise((resolve) => {
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/status.json', Body: Buffer.from(JSON.stringify(statFile, null, 2), 'utf8')}, (err, data) => {
resolve({stat: err || 'ok'})
})
})
}
function createUniqueDomain (url) {
const parsed = urlParser.parse(url)
const sp = new URLSearchParams(parsed.search)
let domain
if (sp.get('q')) {
domain = sp.get('q') + '.' + parsed.hostname
} else {
domain = shortid.generate() + '.' + parsed.hostname
}
domain = domain.replace(/ /g, '')
return domain.toLowerCase()
}
function crawl (domain, url, context) {
console.log('crawling: ' + url)
return new Promise(resolve => {
request(url, (err, response, body) => {
if (err || response.statusCode !== 200) { return resolve({statusCode: 500, body: err}) }
images.parseImageUrls(body, url).then(urls => {
images.fetchImages(urls, domain).then(results => {
writeStatus(url, domain, results).then(result => {
resolve({statusCode: 200, body: JSON.stringify(result)})
})
})
})
})
})
}
function queueAnalysis (domain, url, context) {
let accountId = process.env.ACCOUNTID
if (!accountId) {
accountId = context.invokedFunctionArn.split(':')[4]
}
let queueUrl = `https://sqs.${process.env.REGION}.amazonaws.com/${accountId}/${process.env.ANALYSIS_QUEUE}`
let params = {
MessageBody: JSON.stringify({action: 'analyze', msg: {domain: domain}}),
QueueUrl: queueUrl
}
return new Promise(resolve => {
sqs.sendMessage(params, (err, data) => {
if (err) { console.log('QUEUE ERROR: ' + err); return resolve({statusCode: 500, body: err}) }
console.log('queued analysis: ' + queueUrl)
resolve({statusCode: 200, body: {queue: queueUrl, msgId: data.MessageId}})
})
})
}
module.exports.crawlImages = function (event, context, cb) {
asnc.eachSeries(event.Records, (record, asnCb) => {
let { body } = record
try {
body = JSON.parse(body)
} catch (exp) {
return asnCb('message parse error: ' + record)
}
if (body.action === 'download' && body.msg && body.msg.url) {
const udomain = createUniqueDomain(body.msg.url)
crawl(udomain, body.msg.url, context).then(result => {
queueAnalysis(udomain, body.msg.url, context).then(result => {
asnCb(null, result)
})
})
} else {
asnCb('malformed message')
}
}, (err) => {
if (err) { console.log(err) }
cb()
})
}
</code>
'use strict'
const request = require('request')
const urlParser = require('url')
const URLSearchParams = require('url').URLSearchParams
const shortid = require('shortid')
const asnc = require('async')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const sqs = new AWS.SQS({region: process.env.REGION})
const images = require('./images')()
function writeStatus (url, domain, results) {
let parsed = urlParser.parse(url)
parsed.hostname = domain
parsed.host = domain
const statFile = {
url: urlParser.format(parsed),
stat: 'downloaded',
downloadResults: results
}
return new Promise((resolve) => {
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/status.json', Body: Buffer.from(JSON.stringify(statFile, null, 2), 'utf8')}, (err, data) => {
resolve({stat: err || 'ok'})
})
})
}
function createUniqueDomain (url) {
const parsed = urlParser.parse(url)
const sp = new URLSearchParams(parsed.search)
let domain
if (sp.get('q')) {
domain = sp.get('q') + '.' + parsed.hostname
} else {
domain = shortid.generate() + '.' + parsed.hostname
}
domain = domain.replace(/ /g, '')
return domain.toLowerCase()
}
function crawl (domain, url, context) {
console.log('crawling: ' + url)
return new Promise(resolve => {
request(url, (err, response, body) => {
if (err || response.statusCode !== 200) { return resolve({statusCode: 500, body: err}) }
images.parseImageUrls(body, url).then(urls => {
images.fetchImages(urls, domain).then(results => {
writeStatus(url, domain, results).then(result => {
resolve({statusCode: 200, body: JSON.stringify(result)})
})
})
})
})
})
}
function queueAnalysis (domain, url, context) {
let accountId = process.env.ACCOUNTID
if (!accountId) {
accountId = context.invokedFunctionArn.split(':')[4]
}
let queueUrl = `https://sqs.${process.env.REGION}.amazonaws.com/${accountId}/${process.env.ANALYSIS_QUEUE}`
let params = {
MessageBody: JSON.stringify({action: 'analyze', msg: {domain: domain}}),
QueueUrl: queueUrl
}
return new Promise(resolve => {
sqs.sendMessage(params, (err, data) => {
if (err) { console.log('QUEUE ERROR: ' + err); return resolve({statusCode: 500, body: err}) }
console.log('queued analysis: ' + queueUrl)
resolve({statusCode: 200, body: {queue: queueUrl, msgId: data.MessageId}})
})
})
}
module.exports.crawlImages = function (event, context, cb) {
asnc.eachSeries(event.Records, (record, asnCb) => {
let { body } = record
try {
body = JSON.parse(body)
} catch (exp) {
return asnCb('message parse error: ' + record)
}
if (body.action === 'download' && body.msg && body.msg.url) {
const udomain = createUniqueDomain(body.msg.url)
crawl(udomain, body.msg.url, context).then(result => {
queueAnalysis(udomain, body.msg.url, context).then(result => {
asnCb(null, result)
})
})
} else {
asnCb('malformed message')
}
}, (err) => {
if (err) { console.log(err) }
cb()
})
}
images.js:
<code>'use strict'
const request = require('request')
const htmlparser = require('htmlparser2')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const uuid = require('uuid/v1')
module.exports = function () {
function parseImageUrls (html, url) {
return new Promise(resolve => {
let urls = []
const parser = new htmlparser.Parser({
onopentag: function (name, attribs) {
if (name === 'img' && attribs && attribs.src) {
if (/^data:image/i.test(attribs.src)) {
urls.push({url: attribs.src, id: attribs.id})
} else if (!/^(f|ht)tps?:///i.test(attribs.src)) {
if (attribs.src[0] === '/' || url[url.length - 1] === '/') {
urls.push({url: url + attribs.src, id: attribs.id})
} else {
urls.push({url: url + '/' + attribs.src, id: attribs.id})
}
} else {
urls.push({url: attribs.src, id: attribs.id})
}
}
},
onend: function () {
resolve(urls)
}
}, {decodeEntities: true})
parser.write(html)
parser.end()
})
}
function fetchImage (imageUrl, id, domain) {
return new Promise((resolve, reject) => {
console.log('fetching: ' + imageUrl)
request.head(imageUrl, (err, response, body) => {
console.log('fetching: ' + response)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
request({url: imageUrl, encoding: null}, (err, response, buffer) => {
console.log('fetching: ' + response.statusCode)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: buffer}, (err, data) => {
console.log('writing: ' + imageUrl)
resolve({url: imageUrl, stat: err || 'ok'})
})
})
})
})
}
function decodeImage (imageUrl, id, domain) {
const spl = imageUrl.split(',')
const data = spl[1]
return new Promise((resolve, reject) => {
let type = /data:image/(.+);base64/i.exec(spl[0])
if (type) {
type = type[1]
const b = Buffer.from(data, 'base64')
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: b}, (err, data) => {
resolve({url: imageUrl, stat: err || 'ok'})
})
} else {
resolve({url: imageUrl, stat: 'unknonwn type'})
}
})
}
function fetchImages (images, domain) {
return new Promise((resolve, reject) => {
let promises = []
images.forEach(image => {
if (/^data:image/i.test(image.url)) {
promises.push(decodeImage(image.url, image.id, domain))
} else {
promises.push(fetchImage(image.url, image.id, domain))
}
})
Promise.all(promises).then(values => { resolve(values) })
})
}
return {
fetchImages,
parseImageUrls
}
}
</code>
<code>'use strict'
const request = require('request')
const htmlparser = require('htmlparser2')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const uuid = require('uuid/v1')
module.exports = function () {
function parseImageUrls (html, url) {
return new Promise(resolve => {
let urls = []
const parser = new htmlparser.Parser({
onopentag: function (name, attribs) {
if (name === 'img' && attribs && attribs.src) {
if (/^data:image/i.test(attribs.src)) {
urls.push({url: attribs.src, id: attribs.id})
} else if (!/^(f|ht)tps?:///i.test(attribs.src)) {
if (attribs.src[0] === '/' || url[url.length - 1] === '/') {
urls.push({url: url + attribs.src, id: attribs.id})
} else {
urls.push({url: url + '/' + attribs.src, id: attribs.id})
}
} else {
urls.push({url: attribs.src, id: attribs.id})
}
}
},
onend: function () {
resolve(urls)
}
}, {decodeEntities: true})
parser.write(html)
parser.end()
})
}
function fetchImage (imageUrl, id, domain) {
return new Promise((resolve, reject) => {
console.log('fetching: ' + imageUrl)
request.head(imageUrl, (err, response, body) => {
console.log('fetching: ' + response)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
request({url: imageUrl, encoding: null}, (err, response, buffer) => {
console.log('fetching: ' + response.statusCode)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: buffer}, (err, data) => {
console.log('writing: ' + imageUrl)
resolve({url: imageUrl, stat: err || 'ok'})
})
})
})
})
}
function decodeImage (imageUrl, id, domain) {
const spl = imageUrl.split(',')
const data = spl[1]
return new Promise((resolve, reject) => {
let type = /data:image/(.+);base64/i.exec(spl[0])
if (type) {
type = type[1]
const b = Buffer.from(data, 'base64')
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: b}, (err, data) => {
resolve({url: imageUrl, stat: err || 'ok'})
})
} else {
resolve({url: imageUrl, stat: 'unknonwn type'})
}
})
}
function fetchImages (images, domain) {
return new Promise((resolve, reject) => {
let promises = []
images.forEach(image => {
if (/^data:image/i.test(image.url)) {
promises.push(decodeImage(image.url, image.id, domain))
} else {
promises.push(fetchImage(image.url, image.id, domain))
}
})
Promise.all(promises).then(values => { resolve(values) })
})
}
return {
fetchImages,
parseImageUrls
}
}
</code>
'use strict'
const request = require('request')
const htmlparser = require('htmlparser2')
const AWS = require('aws-sdk')
const s3 = new AWS.S3()
const uuid = require('uuid/v1')
module.exports = function () {
function parseImageUrls (html, url) {
return new Promise(resolve => {
let urls = []
const parser = new htmlparser.Parser({
onopentag: function (name, attribs) {
if (name === 'img' && attribs && attribs.src) {
if (/^data:image/i.test(attribs.src)) {
urls.push({url: attribs.src, id: attribs.id})
} else if (!/^(f|ht)tps?:///i.test(attribs.src)) {
if (attribs.src[0] === '/' || url[url.length - 1] === '/') {
urls.push({url: url + attribs.src, id: attribs.id})
} else {
urls.push({url: url + '/' + attribs.src, id: attribs.id})
}
} else {
urls.push({url: attribs.src, id: attribs.id})
}
}
},
onend: function () {
resolve(urls)
}
}, {decodeEntities: true})
parser.write(html)
parser.end()
})
}
function fetchImage (imageUrl, id, domain) {
return new Promise((resolve, reject) => {
console.log('fetching: ' + imageUrl)
request.head(imageUrl, (err, response, body) => {
console.log('fetching: ' + response)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
request({url: imageUrl, encoding: null}, (err, response, buffer) => {
console.log('fetching: ' + response.statusCode)
if (err || response.statusCode !== 200) { return resolve({url: imageUrl, stat: response.statusCode, err: err}) }
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: buffer}, (err, data) => {
console.log('writing: ' + imageUrl)
resolve({url: imageUrl, stat: err || 'ok'})
})
})
})
})
}
function decodeImage (imageUrl, id, domain) {
const spl = imageUrl.split(',')
const data = spl[1]
return new Promise((resolve, reject) => {
let type = /data:image/(.+);base64/i.exec(spl[0])
if (type) {
type = type[1]
const b = Buffer.from(data, 'base64')
const fileName = uuid()
s3.putObject({Bucket: process.env.BUCKET, Key: domain + '/' + fileName, Body: b}, (err, data) => {
resolve({url: imageUrl, stat: err || 'ok'})
})
} else {
resolve({url: imageUrl, stat: 'unknonwn type'})
}
})
}
function fetchImages (images, domain) {
return new Promise((resolve, reject) => {
let promises = []
images.forEach(image => {
if (/^data:image/i.test(image.url)) {
promises.push(decodeImage(image.url, image.id, domain))
} else {
promises.push(fetchImage(image.url, image.id, domain))
}
})
Promise.all(promises).then(values => { resolve(values) })
})
}
return {
fetchImages,
parseImageUrls
}
}
When the function is invoked I get this error message:
<code>2024-06-02T15:23:47.691Z undefined ERROR Uncaught Exception
{
"errorType": "Runtime.ImportModuleError",
"errorMessage": "Error: Cannot find module 'request'nRequire stack:n- /var/task/handler.jsn- /var/runtime/index.mjs",
"stack": [
"Runtime.ImportModuleError: Error: Cannot find module 'request'",
"Require stack:",
"- /var/task/handler.js",
"- /var/runtime/index.mjs",
" at _loadUserApp (file:///var/runtime/index.mjs:1087:17)",
" at async UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:1119:21)",
" at async start (file:///var/runtime/index.mjs:1282:23)",
" at async file:///var/runtime/index.mjs:1288:1"
]
}
</code>
<code>2024-06-02T15:23:47.691Z undefined ERROR Uncaught Exception
{
"errorType": "Runtime.ImportModuleError",
"errorMessage": "Error: Cannot find module 'request'nRequire stack:n- /var/task/handler.jsn- /var/runtime/index.mjs",
"stack": [
"Runtime.ImportModuleError: Error: Cannot find module 'request'",
"Require stack:",
"- /var/task/handler.js",
"- /var/runtime/index.mjs",
" at _loadUserApp (file:///var/runtime/index.mjs:1087:17)",
" at async UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:1119:21)",
" at async start (file:///var/runtime/index.mjs:1282:23)",
" at async file:///var/runtime/index.mjs:1288:1"
]
}
</code>
2024-06-02T15:23:47.691Z undefined ERROR Uncaught Exception
{
"errorType": "Runtime.ImportModuleError",
"errorMessage": "Error: Cannot find module 'request'nRequire stack:n- /var/task/handler.jsn- /var/runtime/index.mjs",
"stack": [
"Runtime.ImportModuleError: Error: Cannot find module 'request'",
"Require stack:",
"- /var/task/handler.js",
"- /var/runtime/index.mjs",
" at _loadUserApp (file:///var/runtime/index.mjs:1087:17)",
" at async UserFunction.js.module.exports.load (file:///var/runtime/index.mjs:1119:21)",
" at async start (file:///var/runtime/index.mjs:1282:23)",
" at async file:///var/runtime/index.mjs:1288:1"
]
}