High resolution images from rijksmuseum

Alec Jacobson

June 03, 2013

weblog/

Here's a php script to download and stitch together high resolution images from the rijksmuseum:
<?php

# http://www.php.net/manual/en/function.json-decode.php#107107
function prepareJSON($input) {
    
    //This will convert ASCII/ISO-8859-1 to UTF-8.
    //Be careful with the third parameter (encoding detect list), because
    //if set wrong, some input encodings will get garbled (including UTF-8!)
    $imput = mb_convert_encoding($input, 'UTF-8', 'ASCII,UTF-8,ISO-8859-1');
    
    //Remove UTF-8 BOM if present, json_decode() does not like it.
    if(substr($input, 0, 3) == pack("CCC", 0xEF, 0xBB, 0xBF)) $input = substr($input, 3);
    
    return $input;
}

$url = $argv[1];
$url = preg_replace("/^https/","http",$url);

echo "Getting title...";
if(preg_match("/\/en\/collection\//",$url))
{
  $contents = file_get_contents($url);
  preg_match('/objectNumber : "([^"]*)"/',$contents,$matches);
  $id = $matches[1];
  preg_match('/objectTitle : "([^"]*)"/',$contents,$matches);
}else
{
  $offset = preg_replace("/^.*,([0-9]*)$/","\\1",$url);
  # extract id
  $id = preg_replace("/^.*\//","",$url);
  $id = preg_replace("/,.*$/","",$id);
  #$id="SK-A-147";
  $title_url = preg_replace("/search\/objecten\?/",
    "api/search/browse/items?offset=".$offset."&count=1&",$url);
  $title_url = preg_replace("/#\//", "&objectNumber=",$title_url);
  $title_url = preg_replace("/,[0-9]*$/", "",$title_url);
  $contents = file_get_contents($title_url);
  #$contents = file_get_contents("objecten.js");
  $items = json_decode(prepareJSON($contents), true);
  $title = $items["setItems"][0]["ObjectTitle"];
  $title = preg_replace("/^.*f.principalMaker.sort=([^#]*)#.*$/","\\1",$url).
    "-".$title;
}
$title = html_entity_decode($matches[1], ENT_COMPAT, 'utf-8');
$title = iconv("utf-8","ascii//TRANSLIT",$title);
$title = preg_replace("/[^A-z0-9]+/","-",$title);
$final = strtolower($title);
echo "\n";

echo "Getting images...";
$contents = file_get_contents(
  "http://q42imageserver.appspot.com/api/getTilesInfo?object_id=".$id);
#$contents = file_get_contents("levels.js");


$levels = json_decode(prepareJSON($contents), true);
$levels = $levels{"levels"};

$list="";
foreach( $levels as $level)
{
  if($level{"name"} == "z0")
  {
    $tiles = $level{"tiles"};
    // Obtain a list of columns
    foreach ($tiles as $key => $row) {
      $xs[$key]  = $row['x'];
      $ys[$key] =  $row['y'];
    }

    // Sort the data with volume descending, edition ascending
    // Add $data as the last parameter, to sort by the common key
    array_multisort($ys, SORT_ASC, $xs, SORT_ASC, $tiles);

    $tile_x = 0;
    $tile_y = 0;
    foreach( $tiles as $tile)
    {
      $x = $tile{"x"};
      $y = $tile{"y"};
      $tile_x = max($tile_x,intval($x)+1);
      $tile_y = max($tile_y,intval($y)+1);
      $img = "z0-$x-$y.jpg";
      $url = $tile{"url"};
      echo "(".$x.",".$y.") ";
      file_put_contents($img, file_get_contents($url));
      $list .= " ".$img;
    }
    break;
  }
}
echo "\n";
echo "Composing images...";
`montage $list -tile ${tile_x}x${tile_y} -geometry +0+0 -quality 100 $final.jpg`;
echo "\n";
echo $final.".jpg\n";

echo "Clean up...";
`rm -f $list`;
echo "\n";
?>
Then you can call the script from the command line with something like:
php rijksmuseum.php "https://www.rijksmuseum.nl/en/collection/NG-2011-6-24"
Buried inside of that script is also a nice way to clean up strings for use as filenames:
$title = html_entity_decode($matches[1], ENT_COMPAT, 'utf-8');
$title = iconv("utf-8","ascii//TRANSLIT",$title);
$title = preg_replace("/[^A-z0-9]+/","-",$title);
$final = strtolower($title);