0

I have more than 100,000,000 page URLs, how can I make the QuerySet be dynamic in the sense that each class will have 10,000 unique URLs without manually creating the integers in 10,000 classes?


# sitemap.py account_

from django.contrib.sitemaps import Sitemap
from django.shortcuts import reverse
from appname.models import Page
import datetime

from appname.sitemaps import Page000001
from appname.sitemaps import Page000002

ps_dict_01 = {
"ps_file_000001": Page000001,
"ps_file_000002": Page000002,
{

class Page000001(Sitemap):
    def items(self):
        return Passage.objects.all()[:10000]

    lastmod = datetime.datetime.now()
    changefreq = 'hourly'
    priority = 1.0
    protocol = 'http'

class Page000002(Sitemap):
    def items(self):
        return Passage.objects.all()[10000:20000]

    lastmod = datetime.datetime.now()
    changefreq = 'hourly'
    priority = 1.0
    protocol = 'http'

1 Answers1

0

You should be able to split the queryset into chunks and generate the sitemaps.

As example of chunking a queryset from this gist

# utils.py

def queryset_iterator(queryset, chunk_size=1000):
    """
    Iterate over a Django Queryset ordered by the primary key
    This method loads a maximum of chunk_size (default: 1000) rows in it's
    memory at the same time while django normally would load all rows in it's
    memory. Using the iterator() method only causes it to not preload all the
    classes.
    Note that the implementation of the iterator does not support ordered query sets.
    """
        try:
            last_pk = queryset.order_by('-pk')[:1].get().pk
        except ObjectDoesNotExist:
            return

        pk = 0
        queryset = queryset.order_by('pk')
        while pk < last_pk:
            for row in queryset.filter(pk__gt=pk)[:chunk_size]:
                pk = row.pk
                yield row
            gc.collect()

You should be able to take that approach to generating sitemaps

# sitemaps.py

from django.contrib.sitemaps import Sitemap

from appname.models import Page

from .utils import queryset_iterator


def generate_sitemaps():
    sitemaps = {}
    qs = Page.objects.all()
    i = 1
    for chunk in queryset_iterator(qs, chunk_size=10000):
        _sitemap = PageSitemap(items=chunk)
        sitemaps[f"page_{i}"] = _sitemap
        i += 1
    return sitemaps


class PageSitemap(Sitemap):
    changefreq = "never"
    priority = 0.5

    def __init__(self, items=None):
        if items:
            self.items = items
        super().__init__()

    def items(self):
        if self.items:
            return items
        else:
            return Page.objects.all()
# urls

from django.contrib.sitemaps.views import sitemap
from django.views.decorators.cache import cache_page

from .sitemaps import generate_sitemaps


urlpatterns = [
    path(
        'sitemap.xml',
        cache_page(timeout=60 * 60, cache='pages')(sitemap),
        {'sitemaps': generate_sitemaps()}
    ),
]
markwalker_
  • 12,078
  • 7
  • 62
  • 99