import csv import os # Author: Md Md Farid choiceMade = False while choiceMade == False: source = '' nextSource = '' choice = input("Type 'new' to create a new database source or type 'load' to load an existing source. Or " "type 'exit' to close the program. ") # Load existing database file (Default full name with '.csv' required). if choice.lower() == 'load': fileExists = False nextFileExists = False while fileExists == False: sourceName = input("Type name of existing csv file: ") if os.path.exists(sourceName) == True: source = sourceName fileExists = True else: print("File does not exist. Type an existing file") while nextFileExists == False: sourceNameTwo = input("Enter data source filename to append company data: ") if os.path.exists(sourceNameTwo) == True: nextSource = sourceNameTwo nextFileExists = True else: print("File does not exist. Type an existing file") nextInstances = [] nextStructuredInstances = [] header = '' # Retrieve data from additional data source. with open(nextSource, "r", newline="") as ns: nextReader = csv.reader(ns, delimiter=',', quotechar='|') next(nextReader) # Ignore header for nextRow in nextReader: nextInstances.append(nextRow) # Structure company data from Big Picture. if 'linkedin' in nextSource.lower(): for eachRow in nextInstances: manualStructuredSample = [eachRow[0], eachRow[1], eachRow[2], '', eachRow[3], eachRow[4], eachRow[5], eachRow[6], '', eachRow[7], eachRow[8], '', eachRow[9], '', ''] nextStructuredInstances.append(manualStructuredSample) # Add structured company data into the existing database file. with open(source, "a", newline="") as nw: mainReader = csv.reader(nw, delimiter=',', quotechar='|') mainWriter = csv.writer(nw, delimiter=',', quotechar='|') mainWriter.writerows(nextStructuredInstances) # Structure British company data from Companies House (Country Code was hardcoded for British companies only). elif 'basiccompanydata' in nextSource.lower(): for eachRow in nextInstances: manualStructuredSample = ['company/' + eachRow[0].lower(), eachRow[0], eachRow[32], '', eachRow[26], '', eachRow[10], eachRow[14], eachRow[4] + '' + eachRow[5], eachRow[6], eachRow[8], eachRow[9], 'GB', '', ''] nextStructuredInstances.append(manualStructuredSample) # Add structured company data into the existing database file. with open(source, "a", newline="") as nw: mainReader = csv.reader(nw, delimiter=',', quotechar='|') mainWriter = csv.writer(nw, delimiter=',', quotechar='|') mainWriter.writerows(nextStructuredInstances) # Append extra company data from Yell UK or Yelp. elif 'yell' in nextSource.lower() or 'yelp' in nextSource.lower(): preYell = [] for eachRow in nextInstances: with open(source, "r", newline="") as oldSource: yellReader = csv.reader(oldSource, delimiter=',', quotechar='|') for eachRowTwo in yellReader: preYell.append(eachRowTwo) # Fill gaps of company data. for modifyEachRow in preYell: if nextInstances[0][0].lower() in modifyEachRow[1].lower(): modifyEachRow[3] = nextInstances[0][2] modifyEachRow[13] = nextInstances[0][1] modifyEachRow[14] = nextInstances[0][3] # Compile data. with open(source, "w", newline="") as oldSource: yellWriter = csv.writer(oldSource, delimiter=',', quotechar='|') yellWriter.writerows(preYell) # Append extra company data from Yellowpages US extracted with Apify. elif 'yellow-pages' in nextSource: for eachRow in nextInstances: manualStructuredSample = ['company/' + eachRow[9].lower(), eachRow[9], eachRow[15], eachRow[1], '', '', '', '', '', 'US', eachRow[10]] nextStructuredInstances.append(manualStructuredSample) # Attempted general structure of company data from unknown 'csv' sources. else: for eachRow in nextInstances: manualStructuredSample = ['company/' + eachRow[0].lower(), eachRow[0], eachRow[32], eachRow[26], '', eachRow[10], eachRow[14], eachRow[6], eachRow[8], 'UK'] nextStructuredInstances.append(manualStructuredSample) with open(source, "a", newline="") as nw: mainReader = csv.reader(nw, delimiter=',', quotechar='|') mainWriter = csv.writer(nw, delimiter=',', quotechar='|') mainWriter.writerows(nextStructuredInstances) # New files are defaulted in 'csv' format for commonality. elif choice.lower() == 'new': fileExists = False sourceName = input("Type new name of file. No need to type extension as csv format is defaulted. ") source = sourceName + '.csv' while fileExists == False: sourceNameTwo = input("Enter data source filename to append company data: ") if os.path.exists(sourceNameTwo) == True: nextSource = sourceNameTwo fileExists = True else: print("File does not exist. Type an existing file") nextInstances = [] nextStructuredInstances = [] # Retrieve company data from first source. with open(nextSource, "r", newline="") as ns: nextReader = csv.reader(ns, delimiter=',', quotechar='|') next(nextReader) for nextRow in nextReader: nextInstances.append(nextRow) # Structure company data from Big Picture. if 'linkedin' in nextSource.lower(): for eachRow in nextInstances: manualStructuredSample = [eachRow[0], eachRow[1], eachRow[2], '', eachRow[3], eachRow[4], eachRow[5], eachRow[6], '', eachRow[7], eachRow[8], '', eachRow[9], '', ''] nextStructuredInstances.append(manualStructuredSample) # Structure British company data from Companies House (Country Code was hardcoded for British companies only). elif 'basiccompanydata' in nextSource.lower(): for eachRow in nextInstances: manualStructuredSample = ['company/' + eachRow[0].lower(), eachRow[0], eachRow[32], '', eachRow[26], '', eachRow[10], eachRow[14], eachRow[4] + '' + eachRow[5], eachRow[6], eachRow[8], eachRow[9], 'GB', '', ''] nextStructuredInstances.append(manualStructuredSample) # No data sources from Yell and Yelp can be used to supplement a new database. # Supplement new database file. newSource = open(source, "x") with open(source, "w", newline="") as nw: fieldnames = ["handle", "name", "website", "phone", "industry", "size", "type", "founded", "address", "city", "state", "post_code", "country_code", "rating", "hours"] newWriter = csv.writer(nw, delimiter=',', quotechar='|') newWriter.writerow(fieldnames) newWriter.writerows(nextStructuredInstances) print("Your new database filename is " + source) # Close program. elif choice.lower() == 'exit': choiceMade = True else: print("Invalid choice typed. Enter 'new' to create a new database source or type 'load'" "to load an existing source. Or enter 'exit' to close the program. ")