Need Help in Python scripting. I have written class and defined objects needed main method to execute the python script.I am working validating file using the python script to check for the unwanted values in the file.to check the file name if it belongs to the preferred vendor.Please help on this since I was working on priority.
`Need Help in Python scripting. I have written class and defined objects needed main method to execute the python script.
1) I am working validating file using the python script to check for the unwanted values in the file.
2) to check the file name if it belongs to the preferred vendor.
Please help on this since I was working on priority.
import argparse
import boto3
import re
import os
import logging
#Parsing the Arguments
parser = argparse.ArgumentParser(description="Amazon S3 File Operations")
parser.add_argument('sourcePath')
parser.add_argument('src_file_pattern')
parser.add_argument('function_name', choices=['getAllFileNames','validateVendorPrograms','get_list_of_files'])
args = parser.parse_args()
s3_client = boto3.client('s3')
##### validators
class Validator(object):
def __init__(self, empty_ok=False):
self.fail_count = 0
self.empty_ok = empty_ok
@property
def bad(self):
raise NotImplementedError
def validate (self, field, row):
raise NotImplementedError
##### LocalFile
class VladInput(object):
def __init__(self):
raise NotImplementedError
def open(self):
raise NotImplementedError
def __repr__(self):
raise NotImplementedError
class LocalFile(VladInput):
def __init__(self, filename):
self.filename = filename
def open(self):
with open (self.filename, "r", encoding = "ISO-8859-1") as f:
return f.readlines()
def __repr__(self):
return "{}('{}')".format(self.__class__.__name__, self.filename)
class EmptyValidator(Validator):
def __init__(self, **kwargs):
super(EmptyValidator, self).__init__(**kwargs)
self.nonempty = set([])
def validate(self, field, row={}):
if field != "":
self.nonempty.add(field)
raise ValidationException("'{}' is not an empty string".format(field))
@property
def bad(self):
return self.nonempty
class NotEmptyValidator(Validator):
def __init__(self, **kwargs):
super(NotEmptyValidator, self).__init__(**kwargs)
self.fail_count = 0
self.failed = False
def validate(self, field, row={}):
if field == "":
self.failed = True
raise ValidationException("Row has empty field in column")
@property
def bad(self):
return self.failed
class RegexValidator(Validator):
def __init__(self, pattern=r"di^", full=False, **kwargs):
super(RegexValidator, self).__init__(**kwargs)
self.failures = set([])
if full:
self.regex = re.compile(r"(?:" + pattern + r")Z")
else:
self.regex = re.compile(pattern)
def validate(self, field, row={}):
if not self.regex.match(field) and (field or not self.empty_ok):
self.failures.add(field)
raise ValidationException(
"'{}' does not match pattern /{}/".format(field, self.regex)
)
@property
def bad(self):
return self.failures
class Vlad(object):
def __init__(
self,
source,
validators={},
default_validator=EmptyValidator,
delimiter=None,
ignore_missing_validators=False,
quiet=False,
):
#self.logger = logs.logger
self.failures = defaultdict(lambda: defaultdict(list))
self.missing_validators = None
self.missing_fields = None
self.source = source
self.validators = validators or getattr(self, "validators", {})
self.delimiter = delimiter or getattr(self, "delimiter", ",")
self.line_count = 0
self.ignore_missing_validators = ignore_missing_validators
self.logger.disabled = quiet
# added myself
self.logMessagesForUser = {
'RegexValidator': 'This field has carriage returns, wrong format, illegal characters, or is too long',
'NotEmptyValidator': 'This field cannot be empty',
'SetValidator': 'This field has does not have one of the accepted values'
}
self.loggedErrorRows = []
self.validators.update(
{
field: [default_validator()]
for field, value in self.validators.items()
if not value
}
)
def getAllFileNames(sourcePath, src_file_pattern):
arr = os.listdir(sourcePath)
return arr
def validateVendorPrograms(sourcePath, src_file_pattern):
errorRows = Vlad(source=LocalFile(sourcePath + src_file_pattern),
validators={
'Vendor Type': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,50}$")
],
'Vendor Name': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,100}$")
],
'BCBSM Vendor': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,1}$")
],
'Program Description': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,2048}$")
],
'Eligibility Criteria': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,2048}$")
],
'Benefit Summary': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,2048}$")
],
'Eligibility Coverage': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,200}$")
],
'Program Start': [
NotEmptyValidator(),
# Matches yyyy-mm-dd pattern
RegexValidator("^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$")
],
'Program End': [
NotEmptyValidator(),
# Matches yyyy-mm-dd pattern
RegexValidator("^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$")
],
'URL': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,2048}$")
],
'App Name': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,100}$")
],
'Inclusion': [
RegexValidator("^[.\s\S]{0,1}$")
#SetValidator(['Y', 'N', ''])
],
'Group Number': [
RegexValidator("^[.\s\S]{0,10}$")
],
'Section Code': [
RegexValidator("^[.\s\S]{0,500}$")
],
'Package Code': [
RegexValidator("^[.\s\S]{0,500}$")
],
'Class Plan ID': [
RegexValidator("^[.\s\S]{0,500}$")
],
'Referral Criteria': [
RegexValidator("^[.\s\S]{0,500}$")
],
'Referral Process': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,500}$")
],
'Vendor Contact': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,500}$")
],
'Escalation Contact': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,200}$")
],
'Escalation Phone': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,20}$")
],
'Escalation Email': [
NotEmptyValidator(),
RegexValidator("^[.\s\S]{0,200}$")
]
},
quiet=False).validate()
return errorRows
def get_list_of_files(sourcePath, src_file_pattern):
response = s3_client.list_objects_v2(Bucket=sourcePath)#Prefix=sourceFolder)
list_of_files = []
for obj in response.get('Contents', []):
match = re.search(src_file_pattern, obj['Key'])
if match:
list_of_files.append(os.path.basename(obj['Key']))
if len(list_of_files) == 0:
raise Exception("No Files Found.")
return list_of_files
def main():
Write Logic here
if __name__ == "__main__" :
main()`