2025-09-15

The IRS has slowly been improving their treaty documents. As I blogged last week, the IRS added to their website the Treasury Department Technical Explanations for the 1971 Norway Treaty and the 1980 Norway Protocol. I had also previously blogged about errors and omissions in the IRS's version of the Sweden treaty, and how they corrected this by linking to the Treasury Department version of the Sweden treaty.
The IRS website is still missing Technical Explanations for the following countries:
[On 9/19/2025 the IRS also added the technical explanation to the 1994 Sweden treaty, which I hadn't realized was missing.]/p>
* For Luxembourg, New Zealand, and Spain, the IRS website includes the Technical Explanations for the protocols, but not for the treaties themselves.
Below is a Python script that you can use to download the IRS treaty documents that are PDF files (about 170 files).
from pathlib import Path
import requests
from bs4 import BeautifulSoup
def get_country_names_and_country_links():
# the link to the IRS treaties page
url = 'https://www.irs.gov/businesses/international-businesses/united-states-income-tax-treaties-a-to-z'
response = requests.get(url, timeout=20)
soup = BeautifulSoup(response.text, 'html.parser')
country_names = []
country_links = []
# Find all <a> tags in the div in the articles
for a in soup.select('article > div:nth-of-type(1) > div > div a'):
# some <a> tags don't have hrefs; just skip them
if 'href' not in a.attrs:
continue
# get the name of the country (from the text of the <a> tag)
country_name = a.get_text(strip=True)
# get the href (the link itself)
country_link = a['href']
# add the country name and the country link to the appropriate lists
if country_name:
country_names.append(country_name)
country_links.append(country_link)
# skip the last 3 links because they are not links to countries
return country_names[:-3], country_links[:-3]
def save_pdf_files(url):
# the url is a relative url; make it an absolute url
full_url = 'https://www.irs.gov' + url
response = requests.get(full_url, timeout=20)
soup = BeautifulSoup(response.text, 'html.parser')
# find all PDF links in divs with the class 'field'
pdf_links = []
for a in soup.select('div.field a[href]'):
href = a['href']
if href.lower().endswith('.pdf'):
pdf_links.append(href)
# Handle relative URLs
pdf_url = requests.compat.urljoin(url, href)
# Get the filename from the URL
filename = str(pdf_url.split('/')[-1])
# create the file path to save to
filepath = save_dir / filename
# if the file has not already been downloaded, download it
if not filepath.exists():
r = requests.get(pdf_url, timeout=20)
with open(filepath, 'wb') as f:
f.write(r.content)
print(f'Downloaded: {filename}')
# each country page has non_PDF links to Adobe and to the Treasury treaties page; ignore these two links
elif href and href != 'https://get.adobe.com/reader/' and href != 'https://home.treasury.gov/policy-issues/tax-policy/treaties':
# print links to the non-PDF files so that you know which files are treaty documents but not PDFs
print('============== not a PDF', href)
# identify the directory where the PDFs will be saved
save_dir = Path('pdf_downloads')
save_dir.mkdir(parents=True, exist_ok=True)
# get the country names and the links to the country directories
country_names, country_links = get_country_names_and_country_links()
# iterate through each country and download the files for that country
for i, country_link in enumerate(country_links):
print()
print(country_names[i])
save_pdf_files(country_link)