Wednesday, September 4, 2024

PYTHON - PANDAS - NumPy programming

 Working with Directories and File Paths:


The os and os.path modules provide functions for working with directories and file paths.

Sample program to list files in a directory and get file paths:

import os

# List all files in a directory
files = os.listdir("/path/to/directory")
for file in files:
    print(file)

# Get the absolute path of a file
file_path = os.path.abspath("sample.txt")
print(file_path)

# Check if a path exists and if it's a directory or file
if os.path.exists(file_path):
    if os.path.isfile(file_path):
        print("It's a file.")
    elif os.path.isdir(file_path):
        print("It's a directory.")

These advanced file handling functions and techniques can help you work with different file formats, manipulate directories, and efficiently handle various file-related tasks in Python

Arithmatic Operations in Array Objects
# Create two matrices
matrix1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
matrix2 = np.array([[9, 8, 7], [6, 5, 4], [3, 2, 1]])

# Matrix Addition
addition_result = np.add(matrix1, matrix2)

# Matrix Subtraction
subtraction_result = np.subtract(matrix1, matrix2)

# Matrix Multiplication
multiplication_result = np.dot(matrix1, matrix2)

# Display the results
print("Matrix 1:")
print(matrix1)

print("\nMatrix 2:")
print(matrix2)

print("\nMatrix Addition:")
print(addition_result)

print("\nMatrix Subtraction:")
print(subtraction_result)

print("\nMatrix Multiplication:")
print(multiplication_result)

# Create two matrices
matrix1 = np.array([[1, 2], [3, 4]])
matrix2 = np.array([[5, 6], [7, 8]])

# Perform matrix addition
addition_result = matrix1 + matrix2

# Perform matrix subtraction
subtraction_result = matrix1 - matrix2

# Perform matrix multiplication (element-wise)
elementwise_multiplication_result = matrix1 * matrix2

# Perform matrix division (element-wise)
elementwise_division_result = matrix1 / matrix2

# Display the results
print("Matrix1:")
print(matrix1)

print("\nMatrix2:")
print(matrix2)

print("\nMatrix Addition:")
print(addition_result)

print("\nMatrix Subtraction:")
print(subtraction_result)

print("\nElement-wise Matrix Multiplication:")
print(elementwise_multiplication_result)

print("\nElement-wise Matrix Division:")
print(elementwise_division_result)

arr2 = np.arange(10,19)
reshaped_array = arr2.reshape(3,3)
print("Martrix\n",reshaped_array)

arrindex_value = arr2[np.array([[0],[2],[6],[8]])]
print("Fetched Matrix Value using indexing\n",arrindex_value)

 

arr2 = np.arange(0,27)
reshaped_array = arr2.reshape(3,3,3)

print("3 Matices\n",reshaped_array)
print("\nFetched single value",reshaped_array[1,1,1])
print("Access row of single value ",reshaped_array[1,1])
print("Matrix acess of single value\n",reshaped_array[1])

# Descriptive Analysis
import numpy as np
import scipy.stats as stats

# Given students weight
weight = [65, 70, 83, 88, 90, 90, 71, 85, 79, 95]

# Mean
mean = np.mean(weight)
print(f"Mean: {mean}")

# Median
median = np.median(weight)
print(f"Median: {median}")

# Mode
mode_var = np.mode(weight)
print(f"Mode: {mode_var[0]} (appears {mode_var.count[0]} times)")

# Standard Deviation
std_dev = round(np.std(weight, ddof=1))
print(f"Standard Deviation: {std_dev}")

# Variance
variance = round(np.var(weight, ddof=1))
print(f"Variance: {variance}")

# Range
data_range = np.max(weight) - np.min(weight)
print(f"Range: {data_range}")

# Interquartile Range (IQR)
q1 = np.percentile(weight, 25)
print(f"First Quartile (Q1): {q1}")
q3 = np.percentile(weight, 75)
print(f"Third Quartile (Q3): {q3}")
iqr = q3 - q1
print(f"Interquartile Range (IQR): {iqr}")

# Percentiles
percentiles = np.percentile(weight, [25, 50, 75])
print(f"25th Percentile: {percentiles[0]}")
print(f"50th Percentile: {percentiles[1]}")
print(f"75th Percentile: {percentiles[2]}")

In NumPy functions like np.std() and np.var(), the ddof parameter stands for "Delta Degrees of Freedom".

Calculation: When calculating the variance or standard deviation, the denominator used is N - ddof, where N is the number of elements in the array.

Default Value: The default value for ddof in NumPy is 0.
Unbiased Estimator: Setting ddof=1 provides an unbiased estimator of the population variance and standard deviation, assuming the sample is drawn from a larger population.

Why is ddof=1 important?
When calculating the variance or standard deviation of a sample, using ddof=0 (the default) can lead to a biased estimate of the population variance. This is because the sample variance tends to underestimate the true population

import numpy as np

# Create a sample dataset
data = np.array([2, 3, 5, 7, 10, 12, 15])

# Calculate the mean (average) of the dataset
mean = np.mean(data)

# Calculate the standard deviation of the dataset
std_dev = np.std(data)

# Calculate the standard deviation of the dataset
std_dev = np.median(data)

# Calculate the Z-scores for each data point
z_scores = (data - mean) / std_dev

# Display the Z-scores
print("Data:")
print(data)

print("\nZ-Scores:")
print(z_scores)

import pandas as pd

# Creating two Series
series1 = pd.Series([1, 2, 3, 4, 5])
series2 = pd.Series([10, 20, 30, 40, 50])

# Addition
result_add = series1 + series2

# Subtraction
result_sub = series2 - series1

# Multiplication
result_mul = series1 * series2

# Division
result_div = series2 / series1

print("Addition:")
print(result_add)
print("\nSubtraction:")
print(result_sub)
print("\nMultiplication:")
print(result_mul)
print("\nDivision:")
print(result_div)

import pandas as pd

# Creating two Series
series1 = pd.Series([1, 2, 3, 4, 5])
series2 = pd.Series([10, 20, 30, 40, 50])

# Addition
result_add = series1 + series2

# Subtraction
result_sub = series2 - series1

# Multiplication
result_mul = series1 * series2

# Division
result_div = series2 / series1

print("Addition:")
print(result_add)
print("\nSubtraction:")
print(result_sub)
print("\nMultiplication:")
print(result_mul)
print("\nDivision:")
print(result_div)

import pandas as pd
import numpy as np

# Creating a Series
series = pd.Series([1, 4, 9, 16, 25])

# Calculate the square root
result_sqrt = np.sqrt(series)

# Apply a custom function
def custom_function(x):
    return x * 2

result_custom = series.apply(custom_function)

print("Square Root:")
print(result_sqrt)
print("\nCustom Function:")
print(result_custom)

import pandas as pd

# Creating a Series
series = pd.Series([45, 80, 30, 40, 50, 25, 65, 90, 85, 92])

# Conditional filtering
filtered_series = series[series > 50]

print("\nConditional Filtering:")
print(filtered_series)

import pandas as pd

# Create multiple Series
series1 = pd.Series([178, 180, 165, 156, 189], name='Height')
series2 = pd.Series([80, 90, 70, 60, 85], name='Weight')
series3 = pd.Series([10.1, 20.2, 30.3, 40.4, 50.5], name='BMI')

# Display the individual Series
print(series1)
print(series2)
print(series3)

# Create a DataFrame from the Series
df = pd.DataFrame({'Height': series1, 'Weight': series2, 'BMI': series3})

# Display the DataFrame
print(df)

import pandas as pd

# Create Series for height (in cm) and weight (in kg)
height = pd.Series([160, 175, 180, 170, 165], name='Height (cm)')
weight = pd.Series([60, 75, 80, 70, 68], name='Weight (kg)')

# Calculate BMI (weight in kg / (height in meters)^2)
# First, convert height from cm to meters (divide by 100)
height_meters = height / 100

# Calculate BMI
bmi = weight / (height_meters ** 2)

# Create a new Series for BMI
bmi_series = pd.Series(round(bmi), name='BMI')

# Combine height, weight, and BMI into a DataFrame
df = pd.concat([height, weight, bmi_series], axis=1)

# Display the DataFrame
print(df)

import pandas as pd

# Create a Series
Height = [10, 20, 30, 40, 50]
series = pd.Series(data, name='Height')

# Calculate mean, median, and standard deviation
mean_value = series.mean()
median_value = series.median()
std_deviation = round(series.std())

# Create a DataFrame to store the results
result_df = pd.DataFrame({
    'Metric': ['Mean', 'Median', 'Standard Deviation'],
    'Value': [mean_valuemedian_valuestd_deviation]
})

# Display the DataFrame
print(result_df)

import pandas as pd

# Creating a DataFrame from a dictionary

data = {
    'Name': ['Bhaskar', 'Gopinath', 'Senthil', 'Venkat'],
    'Desg': ['DL', 'TL', 'DL', 'GM'],
    'City': ['Chennai', 'Bangalore', 'Chennai', 'Delhi']
}

df = pd.DataFrame(data)

# Displaying the DataFrame
print(df)

import pandas as pd

# Create a list of lists where each inner list represents a row of data
data_list = [
    ['Alice', 25, 'Engineer'],
    ['Bob', 30, 'Designer'],
    ['Charlie', 22, 'Data Analyst'],
    ['David', 35, 'Manager']
]

# Create a DataFrame from the list
df = pd.DataFrame(data_list, columns=['Name', 'Age', 'Occupation'])

# Display the DataFrame
print(df)

import pandas as pd

# Create a list of tuples where each tuple represents a row of data
data_tuples = [
    ('Alice', 25, 'Engineer'),
    ('Bob', 30, 'Designer'),
    ('Charlie', 22, 'Data Analyst'),
    ('David', 35, 'Manager')
]

# Create a DataFrame from the list of tuples
df = pd.DataFrame(data_tuples, columns=['Name', 'Age', 'Occupation'])

# Display the DataFrame
print(df)

import pandas as pd
import numpy as np

# Create an array list where each array represents a column of data
array_list = [
    np.array([1, 2, 3, 4]),
    np.array(['Alice', 'Bob', 'Charlie', 'David']),
    np.array([25, 30, 22, 35])
]

# Create a DataFrame from the array list
df = pd.DataFrame({'ID': array_list[0], 'Name': array_list[1], 'Age': array_list[2]})

# Display the DataFrame
print(df)

import pandas as pd

# Create multiple Series
series1 = pd.Series([178, 180, 165, 156, 189], name='Height')
series2 = pd.Series([80, 90, 70, 60, 85], name='Weight')
series3 = pd.Series([10.1, 20.2, 30.3, 40.4, 50.5], name='BMI')

# Create a DataFrame from the Series
df = pd.DataFrame({'Height': series1, 'Weight': series2, 'BMI': series3})

# Display the DataFrame
print(df)

# Descriptive analysis
print(df.describe())

import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}

df = pd.DataFrame(data)
print(df) # Display DF with all columns
df = df.drop('City', axis=1) # Column Drop
print(df) # Display DF without city column
df = df.drop(index=1, axis=0) # Row Drop
print(df) # Display DF without Bob data

import pandas as pd

# Create a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'],
        'Age': [25, 30, 22],
        'City': ['New York', 'San Francisco', 'Los Angeles']}
df = pd.DataFrame(data)

print(df)

# Remove the 'City' column using del
del df['City']

print(df)

Difference between del and drop

Drop - is a function
Del - is a statement

Drop - Operates in both column and rows
Del - Operates only in column

Drop - Operates on multiple items at a time
Del - Operates one item at a time

import pandas as pd

# Creating a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [25, 30, 35, 40, 45]}

df = pd.DataFrame(data)

# Display the first 3 rows
print("First 3 records: \n",df.head(3))

# Display the last 2 rows
print("Last 3 records: \n",df.tail(2))

# Get the shape of the DataFrame
print("Shape of my dataframe: ",df.shape)  
# Output: (5, 2) (5 rows, 2 columns)

import pandas as pd

# Creating a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [25, 30, 35, 40, 45],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Miami']}
df = pd.DataFrame(data)

# Indexing a single column using square brackets
name_column = df['Name']

# Indexing multiple columns
name_age_columns = df[['Name', 'Age']]

# Indexing a single column using dot notation
age_column = df.Age

# Displaying the selected columns
print("Name Column (Square Brackets):")
print(name_column)

# Displaying the selected multiple columns
print("Name and age Column (Square Brackets):")
print(name_age_columns)

# Displaying the selected column using dot notation
print("\nAge Column (Dot Notation):")
print(age_column)

# Slicing in DataFrame
import pandas as pd

# Creating a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [25, 30, 35, 40, 45],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Miami']}

df = pd.DataFrame(data)

# Slicing rows using loc[] (based on labels)
sliced_rows = df.loc[(df.City == "Chicago") | (df.Age >= 30)]  # Rows 1 to 3 (inclusive)

# Slicing rows using iloc[] (based on indices)
sliced_rows_by_index = df.iloc[1:3]  # Rows 1 to 2 (3 is excluded)

# Displaying the sliced rows
print("Sliced Rows (loc[]):")
print(sliced_rows)

print("\nSliced Rows (iloc[]):")
print(sliced_rows_by_index)

import pandas as pd

# Creating a sample DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [30, 25, 35, 40, 30],
        'Salary': [60000, 50000, 75000, 80000, 55000]}
df = pd.DataFrame(data)

# Sort the DataFrame by the 'Age' column in ascending order
sorted_df = df.sort_values(by='Age')

# Display the sorted DataFrame in ascending order
print(sorted_df)

# Sort the DataFrame by the 'Age' column in descending order
sorted_df = df.sort_values(by = "Age", ascending = False)
print(sorted_df)

import pandas as pd
df = pd.read_csv(r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\User Data.csv')
print(df)


import pandas as pd
df = pd.read_excel(r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\User Data.xlsx',sheet_name = "Sheet1")
print(df)

import pandas as p
import numpy as py
df = p.read_csv(r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\StudentData.csv')
print(df)
df["Total"] = (df['Sub1'] + df['Sub2'] + df['Sub3'])
print(df)
df["Average"] = ((df['Sub1'] + df['Sub2'] + df['Sub3'])/3)
print(df)
df["percentage"] = round(((df['Sub1'] + df['Sub2'] + df['Sub3'])/300)*100)
print(df)
df.to_csv(r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\StudentData.csv',index=False)

import pandas as p
import numpy as py

exist_file = r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\StudentData.csv'
df = p.read_csv(exist_file)
print(df)

Sub4_values = [60,80,50,60,45,85,95,75,95,75]
Sub4_name = 'Sub4'

df.insert(4,Sub4_name,Sub4_values)
print(df)

df.to_csv(exist_file,index=False)

# Checking missing values
import pandas as p
import numpy as py

exist_file = r'C:\Users\ICTAcademy\Desktop\FDP\PMIST FDP\Python Script\StudentData.csv'
df = p.read_csv(exist_file)

# 1. Check for missing values
print("\n1. Checking for Missing Values:")
print(df.isnull())

# 2. Count missing values in each column
print("\n2. Counting Missing Values in Each Column:")
print(df.isnull().sum())

# 3. Remove rows with missing values
df_dropped = df.dropna()
print("\n3. DataFrame after Removing Rows with Missing Values:")
print(df_dropped) #

 

 3. Remove row s with missing values
df_dropped = df.dropna()
print("\n3. DataFrame after Removing Rows with Missing Values:")
print(df_dropped)

 

# 4. Fill missing values with a specific value (e.g., mean of the column)
mean_value = round(df.mean())
print("Mean Value is: ",mean_value)
df_filled = df.fillna(mean_value)
print("\n4. DataFrame after Filling Missing Values with Mean:")
print(df_filled)

 

# 5. Replace missing values with a custom value
df_custom_filled = df.fillna({'Sub1': 60, 'Sub2': 65, 'Sub3': 70, 'Sub4':75})
print("\n5. DataFrame after Custom Filling of Missing Values:")
print(df_custom_filled)

 

import pandas as pd
# Package support for odbc connections
import pyodbc as po

# Connection string for SQL Server
connection_string = (
    'Driver={SQL Server};'
    'Server=LAPTOP-SMO6VN72\SQLEXPRESS;'
    'Database=AdventureWorksDW2020;'
)

# Establish a connection to SQL Server
connection = po.connect(connection_string)

# SQL query
sql_query = 'select ProductKey,EnglishProductName from dbo.DimProduct;'

# Execute the query and read data into a DataFrame
df = pd.read_sql(sql_query, connection)

connection.close()

print(df.head(10))

 

import pandas as pd # import the pandas module

# python list of numbers
data = pd.Series([60, 50, 65, 20, 45, 25, 65, 75, 25, 30, 40])

# creates a figure of size 20 inches wide and 10 inches high
data.plot(figsize=(20, 10))

 

 

# import the pandas module
import pandas as pd

# Creating a pandas dataframe
df = pd.DataFrame({'names': ['Bhaskar', 'Venkat', 'Sanjith', 'Vash'],
                   'Credit Points': [10000, 45000, 30000, 20000]})

# creates a bar graph of size 15 inches wide and 10 inches high
df.plot.bar(x='names', y='Credit Points', rot=90, figsize=(10, 5))

 

# import the pandas module
import pandas as pd

# Creating a pandas dataframe with index
df = pd.DataFrame({'value': [3.330, 4.87, 5.97]},
                  index=['A', 'B', 'C'])

df.plot.pie(y='value', figsize=(5, 5))

 

import pandas as pd
import matplotlib.pyplot as plt

# Sample data (replace with your own DataFrame)
data = pd.DataFrame({
    'Year': [2010, 2011, 2012, 2013, 2014, 2015],
    'Revenue': [1050, 1210, 1310, 1210, 1600, 1400]
})

# Create a line plot
data.plot(x='Year', y='Revenue', marker='s', linestyle='-')
plt.title('Revenue Over Time')
plt.xlabel('Year')
plt.ylabel('Revenue (INR)')
plt.grid(True)
plt.show()

 

# Sample data (replace with your own DataFrame)
data = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'D'],
    'Count': [30, 45, 60, 25]
})

# Create a bar plot
data.plot(x='Category', y='Count', kind= 'bar', color='skyblue')
plt.title('Category Counts')
plt.xlabel('Category')
plt.ylabel('Count')
plt.xticks(rotation=90)
plt.show()

 

 

import matplotlib.pyplot as plt
import numpy as np

# make data
x = 0.5 + np.arange(8)
y = [4.8, 5.5, 3.5, 4.6, 6.5, 6.6, 2.6, 3.0]

# plot
fig, ax = plt.subplots()

ax.stem(x, y)

ax.set(xlim=(0, 8), xticks=np.arange(1, 8),
       ylim=(0, 8), yticks=np.arange(1, 8))

plt.show()

 

import matplotlib.pyplot as plt
import numpy as np

# make data
x = np.arange(0, 10, 2)
ay = [1, 1.25, 2, 2.75, 3]
by = [1, 1, 1, 1, 1]
cy = [2, 1, 2, 1, 2]
y = np.vstack([ay, by, cy])

# plot
fig, ax = plt.subplots()

ax.stackplot(x, y)

ax.set(xlim=(0, 8), xticks=np.arange(1, 8),
       ylim=(0, 8), yticks=np.arange(1, 8))

plt.show()

 

import pandas as pd
import matplotlib.pyplot as plt

# Assuming you have a list or array of 50 student heights in centimeters
heights = [160, 165, 170, 155, 175, 180, 162, 168, 172, 163, 166, 169, 176, 161, 164,
           158, 178, 173, 157, 171, 159, 167, 182, 174, 181, 177, 150, 183, 152, 179,
           185, 154, 187, 151, 184, 156, 153, 186, 189, 148, 190, 147, 188, 149, 146,
           191, 145, 144]

# Create a pandas DataFrame from the heights list
df = pd.DataFrame({'Heights (cm)': heights})

# Plot a histogram
plt.hist(df['Heights (cm)'], bins=10, edgecolor='black')
plt.title('Histogram of Student Heights')
plt.xlabel('Height (cm)')
plt.ylabel('Frequency')

# Show the plots
plt.show()





# Plot Box Plots
import pandas as p
import matplotlib.pyplot as plt
import seaborn as sns

df = p.read_csv('E:/Python Script/BMI_Chart.csv')
plt.figure(figsize=(8, 4))
sns.boxplot(data=df, palette='Set2')
plt.title('Box Plot of Columns Height, Weight, and BMI')

 

# Create a line plot using relplot
sns.relplot(x="total_bill", y="tip", kind="line", style = 'smoker', data=data)

# Set plot labels and title
plt.xlabel("Total Bill ($)")
plt.ylabel("Tip ($)")
plt.title("Line Plot of Total Bill vs. Tip")

# Show the plot
plt.show()

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create an lmplot
sns.lmplot(x="total_bill", y="tip", data=data)

# Set plot labels and title
plt.xlabel("Total Bill ($)")
plt.ylabel("Tip ($)")
plt.title("Scatter Plot with Regression Line")

# Show the plot
plt.show()

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a stripplot
sns.stripplot(x="day", y="total_bill", data=data)

# Set plot labels and title
plt.xlabel("Day of the Week")
plt.ylabel("Total Bill ($)")
plt.title("Strip Plot of Total Bill by Day")

# Show the plot
plt.show()

 

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a histogram using histplot
plt.figure(figsize=(8, 4))
sns.histplot(data=data, x="total_bill", color="skyblue")
plt.xlabel("Total Bill ($)")
plt.ylabel("Frequency")
plt.title("Histogram without KDE")
plt.show()

 

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a KDE plot using kdeplot
plt.figure(figsize=(8, 4))
sns.kdeplot(data=data, x="total_bill", fill=True, color="salmon")
plt.xlabel("Total Bill ($)")
plt.ylabel("Density")
plt.title("KDE Plot")
plt.show()

 

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")
# Create a rug plot using rugplot
plt.figure(figsize=(8, 1))
sns.rugplot(data=data, x="total_bill", height=0.5, color="purple")
plt.xlabel("Total Bill ($)")
plt.title("Rug Plot")
plt.show()

 

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a swarmplot
plt.figure(figsize=(8, 4))
sns.swarmplot(data=data, x="day", y="total_bill", palette="Set2")
plt.xlabel("Gender")
plt.ylabel("Total Bill ($)")
plt.title("Swarm Plot of Total Bill by Day")
plt.show()


import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a violin plot
plt.figure(figsize=(8, 4))
sns.violinplot(data=data, x="day", y="total_bill", palette="Set2")
plt.xlabel("Day of the Week")
plt.ylabel("Total Bill ($)")
plt.title("Violin Plot of Total Bill by Day")
plt.show()

 

import seaborn as sns
import matplotlib.pyplot as plt

# Sample data
data = sns.load_dataset("tips")

# Create a jointplot
sns.jointplot(data=data, x="total_bill", y="tip", kind="scatter")
plt.suptitle("Jointplot of Total Bill vs. Tip")
plt.show()

PYTHON- Reading and Writing with with Statement- JSON - BINARY

 Reading and Writing with with Statement:

You can use the with statement to automatically close the file when you're done with it. It's a recommended approach as it ensures that the file is properly closed even if an exception occurs.

Sample program using the with statement:

# Writing to a file using the with statement
with open("sample.txt", "w") as file:
    file.write("Hello, World!")

# Reading from a file using the with statement
with open("sample.txt", "r") as file:
    content = file.read()
    print(content)


Handling Binary Files:

You can open files in binary mode by specifying "b" in the mode string ("rb" for reading binary, "wb" for writing binary, etc.). This is useful for working with non-text files like images or executables.

Sample program for binary file handling:

# Reading a binary file
with open("image.jpg", "rb") as file:
    binary_data = file.read()

# Writing binary data to a file
with open("copy_image.jpg", "wb") as file:
    file.write(binary_data)

These are the fundamental operations for file handling in Python. Remember to handle exceptions, check if files exist before working with them, and close files properly to avoid data corruption and resource leaks.


Advanced File Handling Functions:
In addition to the basic file handling operations mentioned earlier, Python provides more advanced file handling functions and techniques that can help you work with files more efficiently and handle various scenarios. Here are some advanced file handling functions.

Reading and Writing CSV Files:


You can use the csv module to easily read and write CSV (Comma-Separated Values) files.

Sample program to read and write CSV files:

import csv
# Writing data to a CSV file
with open("Sample_csv.csv", "w", newline="") as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(["Name", "Age"])
    csv_writer.writerow(["Bhaskar", 34])
    csv_writer.writerow(["Vinoth", 34])
    csv_writer.writerow(["Senthil", 37])
    csv_writer.writerow(["Venkatesh", 34])

# Reading data from a CSV file
with open("data.csv", "r") as csvfile:
    csv_reader = csv.reader(csvfile)
    for row in csv_reader:
        print(row)


Working with JSON Files:

The json module allows you to work with JSON (JavaScript Object Notation) files.

Sample program to read and write JSON files:

import json

# Writing data to a JSON file
data = {"name": "Alice", "age": 30}
with open("data.json", "w") as jsonfile:
    json.dump(data, jsonfile)

# Reading data from a JSON file
with open("data.json", "r") as jsonfile:
    loaded_data = json.load(jsonfile)
    print(loaded_data)

PYTHON - FILE HANDLING

 File Handling in python


File handling in Python allows you to work with files on your computer's filesystem. You can create, read, write, and manipulate files using various methods and functions provided by Python's built-in modules. Here, I'll explain the basic concepts of file handling in Python and provide a sample program for each operation.


Opening and Closing Files:
To work with a file, you must first open it using the open() function. It takes two arguments: the filename and the mode in which you want to open the file (e.g., read, write, append, etc.). After working with the file, you should close it using the close() method to free up system resources.

Sample program to open and close a file:

# Opening a file in write mode
file = open("sample.txt", "w")

# Writing data to the file
file.write("Hello, World!")

# Closing the file
file.close()


Reading from a File:

You can read the contents of a file using the read() method or by iterating over the file line by line using a loop.

Sample program to read from a file:

# Opening a file in read mode
file = open("sample.txt", "r")

# Reading the entire content
content = file.read()
print(content)

# Closing the file
file.close()


Writing to a File:
Open it in write mode ("w") to write data to a file. You can use the write() method to add content to the file. Be cautious, as opening a file in write mode will overwrite its existing content.

Sample program to write to a file:

# Opening a file in write mode
file = open("sample.txt", "w")

# Writing data to the file
file.write("Hello, World!")

# Closing the file
file.close()

Appending to a File:
If you want to add content to a file without overwriting the existing content, open the file in append mode ("a").

Sample program to append to a file:

# Opening a file in append mode
file = open("sample.txt", "a")

# Appending data to the file
file.write("\nThis is a new line.")

# Closing the file
file.close()

PYTHON - SCOPE

 Python Scope


In Python, "scope" refers to the region or context in which a variable is defined and can be accessed. Python has different levels of scope, including global scope and local scope, and the scope of a variable determines where it can be used or accessed in your code. Understanding scope is crucial for writing clean, maintainable, and bug-free Python programs. Here, I'll explain in detail about Python scope with sample code.

1. Local Scope:

Variables defined within a function have local scope. They are only accessible within that function.
A new local scope is created every time a function is called, and it is destroyed when the function exits.

def my_function():
    local_variable = 42
    print(local_variable)

my_function()  # Calls the function
print(local_variable)  # Raises a NameError because local_variable is not defined here

2. Enclosing (Non-Local) Scope:

In nested functions, variables can be in an "enclosing" scope, also known as a "non-local" scope.
Variables in the enclosing scope are not global but are accessible within the nested functions.

def outer_function():
    outer_variable = 10

    def inner_function():
        print(outer_variable)  # Accesses the outer_variable from the enclosing scope

    inner_function()

outer_function()

3. Global Scope:

Variables defined outside of any function have global scope. They can be accessed from any part of the program.

global_variable = 100

def my_function():
    print(global_variable)  # Accesses the global_variable

my_function()

4. Built-in Scope:

Python has a built-in scope that contains functions and objects provided by Python itself.
You can access built-in functions and objects like print(), len(), str, and int without importing them.

print(len("Hello, World!"))  # Accesses the len() function from the built-in scope

5. Modifying Variables in an Enclosing Scope:

To modify a variable in an enclosing scope from within a nested function, you can use the nonlocal keyword.

def outer_function():
    outer_variable = 10

    def inner_function():
        nonlocal outer_variable  # Use nonlocal to modify outer_variable
        outer_variable += 5

    inner_function()
    print(outer_variable)  # Prints 15

outer_function()

6. Global Variables:

You can declare a variable as global inside a function using the global keyword.
This allows you to modify the global variable within the function.

global_variable = 100

def modify_global():
    global global_variable  # Declare global_variable as global
    global_variable += 10

modify_global()
print(global_variable)  # Prints 110

Scope determines where a variable is visible and accessible. Python follows the LEGB (Local, Enclosing, Global, Built-in) rule for variable name resolution, which means it first looks for a variable in the local scope, then in any enclosing scopes, then in the global scope, and finally in the built-in scope.