Basically i want to drop some columns that i don't need. And i'm kind of stumped why this is not working
import os
import pandas
def summarise(indir, outfile):
os.chdir(indir)
filelist = ".txt"
dflist = []
colnames = ["DSP Code", "Report Date", "Initial Date", "End Date", "Transaction Type", "Sale Type",
"Distribution Channel", "Products Origin ID", "Product ID", "Artist", "Title", "Units Sold",
"Retail Price", "Dealer Price", "Additional Revenue", "Warner Share", "Entity to be billed",
"E retailer name", "E retailer Country", "End Consumer Country", "Price Code", "Currency Code"]
for filename in filelist:
print(filename)
df = pandas.read_csv('SYB_M_20171001_20171031.txt', header=None, encoding='utf-8', sep='\t', names=colnames,
skiprows=3)
df['data_revenue'] = df['Units Sold'] * df['Dealer Price'] # Multiplying Units with Dealer price = Revenue
df = df.sort_values(['End Consumer Country', 'Currency Code']) # Sorts the columns alphabetically
df.to_csv(outfile + r"\output.csv", index=None)
dflist.append(filename)
df.drop(columns='DSP Code')
summarise(r"O:\James Upson\Sound Track Your Brand Testing\SYB Test",
r"O:\James Upson\Sound Track Your Brand Testing\SYB Test Formatted")
I want to drop all the column titles you can see in colnames
excluding 'Units Sold', 'Dealer Price', 'End Consumer Country', 'Currency Code'
. I tried to remove one column using df.drop(columns='DSP Code')
but this doesn't seem to work.
Any help would be greatly appreciated :)