Bikarhêner:Balyozxane/ku-latn.py

Ji Wîkîferhengê
import pywikibot
import re

def get_page_titles_from_file(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        return [line.strip() for line in file]

def extract_page_title(text):
    pattern = r"\{\{bnr(?:2)?\|(.*?)\}\}"
    matches = re.findall(pattern, text)
    return matches

def is_page_in_categories(page_title, categories):
    site = pywikibot.Site("ku", "wiktionary")
    page = pywikibot.Page(site, page_title)

    page_categories = [cat.title(with_ns=False) for cat in page.categories()]
    for category in categories:
        if category in page_categories:
            return category.replace(" bi kurdî", "")
    return None

def update_page_content(page_title, extracted_titles, cleared_category):
    site = pywikibot.Site("ku", "wiktionary")
    page = pywikibot.Page(site, page_title)

    new_content = "== {{ziman|ku}} ==\n\n=== " + cleared_category + " ===\n"
    new_content += "{{" + cleared_category.lower() + "|ku"

    if cleared_category == "Navdêr":
        new_content += "|z=-"
        
    new_content += "|sc=Arab}}\n# {{ku-Latn|" + extracted_titles + "}}"


    page.text = new_content
    page.save("+{{[[Şablon:ku-Latn|ku-Latn]]}} (bi riya [[Bikarhêner:Balyozxane/ku-latn.py|ku-Latn.py]])")

def log_skipped_page(page_title):
    with open('skipped_pages.txt', 'a', encoding='utf-8') as file:
        file.write(page_title + '\n')

# Step 1: Get list of page titles from file
pages = get_page_titles_from_file('mylist.txt')

# Step 2 and 3: Process each page
for page_title in pages:
    content = pywikibot.Page(pywikibot.Site('ku', 'wiktionary'), page_title).text
    extracted_titles = extract_page_title(content)

    if extracted_titles:
        categories_to_check = ["Lêker bi kurdî", "Navdêr bi kurdî", "Rengdêr bi kurdî", "Serenav bi kurdî", "Hoker bi kurdî", "Cînav bi kurdî", "Artîkel bi kurdî", "Baneşan bi kurdî", "Bazinedaçek bi kurdî", "Biwêj bi kurdî", "Daçek bi kurdî", "Girêdek bi kurdî", "Gotineke pêşiyan bi kurdî", "Hejmar bi kurdî", "Hevok bi kurdî", "Kurtenav bi kurdî", "Navgir bi kurdî", "Paşdaçek bi kurdî", "Paşgir bi kurdî", "Pêşdaçek bi kurdî", "Pêşgir bi kurdî", "Pirtik bi kurdî", "Reh bi kurdî", "Sembol bi kurdî", "Tîp bi kurdî"]
        cleared_category = is_page_in_categories(extracted_titles[0], categories_to_check)
        if cleared_category:
            update_page_content(page_title, extracted_titles[0], cleared_category)
            print(f"Updated page content for {page_title}")
        else:
            print(f"{page_title} is not in the specified categories")
            log_skipped_page(page_title)
    else:
        print(f"No page title extracted from {page_title}")
        log_skipped_page(page_title)

print("Finished processing all pages.")