[SOLVED] How to write seperate functions in seperate py files and execute it using main.py without using concept of class

Issue

i am new to python and i am yet to learn the concept of oop,classes with python. i thought i understood functions. But i am facing issue while calling functions from different py file.
Below code shows all my fuctions described in main.py
i want to split main.py and get 2 other py files as data extraction.py and data processing.py
i understand that it can be done using classes, but can we do it without using classes as well?
i divided the code in two other files but i am getting error(please find my attached screenshot)
please explain me what i can do here!

main.py

import pandas as pd
import requests
from bs4 import BeautifulSoup
from configparser import ConfigParser
import logging
import data_extraction


config = ConfigParser()
config.read('config.ini')
logging.basicConfig(filename='logfile.log', level=logging.DEBUG,
                    format='%(asctime)s:%(lineno)d:%(name)s:%(levelname)s:%(message)s')

baseurl = config['configData']['baseurl']
sub_url = config['configData']['sub_url']

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
    "Upgrade-Insecure-Requests": "1", "DNT": "1",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate"

}
r = requests.get(baseurl, headers=headers)
status = r.status_code
soup = BeautifulSoup(r.content, 'html.parser')
model_links = []



all_keys = ['Model', 'Platform', 'Product Family', 'Product Line', '# of CPU Cores',
            '# of Threads', 'Max. Boost Clock', 'Base Clock', 'Total L2 Cache', 'Total L3 Cache',
            'Default TDP', 'Processor Technology for CPU Cores', 'Unlocked for Overclocking', 'CPU Socket',
            'Thermal Solution (PIB)', 'Max. Operating Temperature (Tjmax)', 'Launch Date', '*OS Support']


# function to get the model links in one list from soup object(1st page extraction)
def get_links_in_list():
    for model_list in soup.find_all('td', headers='view-name-table-column'):
        # model_list = model_list.a.text - to get the model names
        model_list = model_list.a.get('href')
        # print(model_list)
        model_list = sub_url + model_list
        # print(model_list)
        one_link = model_list.split(" ")[0]
        model_links.append(one_link)
    return model_links


model_links = get_links_in_list()
logging.debug(model_links)

each_link_data = data_extraction()
print(each_link_data)
#all_link_data = data_processing()
#write_to_csv(all_keys)


data_extraction.py

import requests
from bs4 import BeautifulSoup
from main import baseurl
from main import all_keys

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
    "Upgrade-Insecure-Requests": "1", "DNT": "1",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip, deflate"

}
r = requests.get(baseurl, headers=headers)
status = r.status_code
soup = BeautifulSoup(r.content, 'html.parser')
model_links = []
# function to get data for each link from the website(2nd page extraction)
def data_extraction(model_links):
    each_link_data = []
    try:
        for link in model_links:
            r = requests.get(link, headers=headers)
            soup = BeautifulSoup(r.content, 'html.parser')
            specification = {}
            for key in all_keys:
                spec = soup.select_one(
                    f'.field__label:-soup-contains("{key}") + .field__item, .field__label:-soup-contains("{key}") + .field__items .field__item')

                # print(spec)
                if spec is None:
                    specification[key] = ''
                    if key == 'Model':
                        specification[key] = [i.text for i in soup.select_one('.page-title')]
                        specification[key] = specification[key][0:1:1]
                        # print(specification[key])

                else:

                    if key == '*OS Support':
                        specification[key] = [i.text for i in spec.parent.select('.field__item')]
                    else:
                        specification[key] = spec.text
                specification['link'] = link
                each_link_data.append(specification)
    except:
        print('Error occurred')

    return each_link_data
    # print(each_link_data)

data processing.py


# function for data processing : converting the each link object into dataframe

def data_processing():
    all_link_data = []
    for each_linkdata_obj in each_link_data:
        # make the nested dictionary to normal dict
        norm_dict = dict()
        for key in each_linkdata_obj:
            if isinstance(each_linkdata_obj[key], list):
                norm_dict[key] = ','.join(each_linkdata_obj[key])
            else:
                norm_dict[key] = each_linkdata_obj[key]

        all_link_data.append(norm_dict)
    return all_link_data
    # print(all_link_data)


all_link_data = data_processing()


# function to write dataframe data into csv
def write_to_csv(all_keys):
    all_link_df = pd.DataFrame.from_dict(all_link_data)
    all_link_df2 = all_link_df.drop_duplicates()
    all_link_df3 = all_link_df2.reset_index()
    # print(all_link_df3)
    all_keys = all_keys + ['link']
    all_link_df4 = all_link_df3[all_keys]
    # print(all_link_df4)
    all_link_df4.to_csv('final_data.csv')


write_to_csv(all_keys)

Solution

Move the existing functions(ex. write_to_csv) to different file for example ‘utility_functions.py’. Import it in main.py using from utility_functions import write_to_csv. Now you can use the function ‘write_to_csv’ in main.py as

write_to_csv(all_keys)

Edit
In the main.pyfile
use from data_extraction import data_extraction instead of import data_extraction

In data_extraction.py file

Remove lines
from main import baseurl from main import all_keys

It will throw variable undefined error, you can fix it by passing the variable in the function call.

Answered By – Manish Shetty

Answer Checked By – Marilyn (BugsFixing Volunteer)

Leave a Reply

Your email address will not be published.