[Mapbender-commits] r9873 - trunk/mapbender/http/classes

svn_mapbender at osgeo.org svn_mapbender at osgeo.org
Tue Feb 20 03:27:27 PST 2018


Author: armin11
Date: 2018-02-20 03:27:27 -0800 (Tue, 20 Feb 2018)
New Revision: 9873

Modified:
   trunk/mapbender/http/classes/class_syncCkan.php
Log:
Fix - use paging for ckan synchronisation - needed for more than 1000 packages when using ckan package_search api (tested with ckan 2.7.x).

Modified: trunk/mapbender/http/classes/class_syncCkan.php
===================================================================
--- trunk/mapbender/http/classes/class_syncCkan.php	2018-02-19 17:17:05 UTC (rev 9872)
+++ trunk/mapbender/http/classes/class_syncCkan.php	2018-02-20 11:27:27 UTC (rev 9873)
@@ -125,7 +125,7 @@
         }
         return $array;
     }
-
+    //TODO - use paging for get record list!!!!!
     public function getRemoteCkanRecordList($orga_filter,$central_filter,$filter,$ckan_api_url,$ckan_api_version) {
         $ckanConnector = new connector($ckan_api_url.$ckan_api_version."/"."action/package_search?q=".$filter."&rows=1000"."&facet=true");
         $ckanConnector->set("timeOut", "3");
@@ -406,33 +406,49 @@
 			$queryObject->fq = $catalogue->central_ckan_filter." AND owner_org:".$organization["ckan_uuid"];
 			//$queryObject->fq = "transparency_category_de_rp:spatial_data AND owner_org:".$organization["ckan_uuid"];
                         $queryObject->facet = "true";
-			$queryObject->rows = "1000"; //TODO: maybe an problem somewhen
-                        $listOfFilteredData = $ckan->action_package_search(json_encode($queryObject));
-                        $countCkanMetadataArray = 0;
-                        $ckanMetadataArray = array();
-                        //echo "List of datasets in ckan instance:"."<br>";
-                        $ckanPackageNames = array();
-                        if ($listOfFilteredData->success == true) {
-			    //TODO - why only 10 records are given back when search?
-                            $syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->count_ckan_packages = $listOfFilteredData->result->count;
-                            //echo json_encode($listOfFilteredData)."<br>";
-                            //$e = new mb_exception("Number of results: ".$listOfFilteredData->result->count);
-                            foreach ($listOfFilteredData->result->results as $dataset) {
-                                $ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
-                                $ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
-                                $ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
-                                if ($listAllMetadataInJson == true) {
-                                    $syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
-                                    $syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
-                                }
-                                //echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
-                                $ckanPackageNames[] = $dataset->name;
-                                $countCkanMetadataArray++;
-				//$e = new mb_exception("ckan dataset number: ".$countCkanMetadataArray);
-                            }
-                        } else {
-                            $e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
-                        }				
+			//$e = new mb_exception("test");
+			$queryObject->rows = "1"; //TODO: maybe an problem somewhen
+			//first count number of packages
+			$listOfFilteredDataCount = $ckan->action_package_search(json_encode($queryObject));
+			$numberOfPackagesPerPage = 200;
+			if ($listOfFilteredDataCount->success == true) {
+				$numberOfPackages = $listOfFilteredData->result->count;
+				$numberOfPages = (integer)$numberOfPackages / $numberOfPackagesPerPage;
+			} else {
+				$numberOfPages = 1;
+			}
+			$queryObject->rows = (string)$numberOfPackagesPerPage;
+			//$e = new mb_exception("number of packages: ".$numberOfPackages);
+			$listOfFilteredDataArray = array();
+			for ($nP = 1; $nP <= $numberOfPages ; $nP++) {
+				//$e = new mb_exception("page: ".$nP);
+				$queryObject->start = ($nP - 1) * $numberOfPackagesPerPage;
+				$listOfFilteredDataArray[] = $ckan->action_package_search(json_encode($queryObject));
+			}
+ 			$countCkanMetadataArray = 0;
+			$ckanMetadataArray = array();
+ 			$ckanPackageNames = array();
+			$syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->count_ckan_packages = $numberOfPackages;
+			foreach ($listOfFilteredDataArray as $listOfFilteredData) {
+                        	if ($listOfFilteredData->success == true) {
+			    		//TODO - why only 10 records are given back when search?
+                            		foreach ($listOfFilteredData->result->results as $dataset) {
+                                		$ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
+                                		if ($listAllMetadataInJson == true) {
+                                    			$syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
+                                    			$syncListResultRemoteCkan->external_ckan[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
+                                		}
+                                		//echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
+                                		$ckanPackageNames[] = $dataset->name;
+                                		$countCkanMetadataArray++;
+						//$e = new mb_exception("ckan dataset number: ".$countCkanMetadataArray);
+                            		}
+                        	} else {
+                            		$e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
+                        	}	
+			}			
                         // only list http://localhost:5000/api/3/action/package_list?q=owner_org:81476cf5-6c52-4e99-8b9f-6150d63fcb32	
                         //pull all relevant information from mapbender database - first pull the resources which are owned by the corresponding group!
                         //only use metadata for which real licenses are defined !!!!!! - what should be done with the other metadata?- DO a left join!!!
@@ -568,34 +584,52 @@
                         //$queryObject->fq = STANDARD_CKAN_FILTER." AND owner_org:".$organization["ckan_uuid"];
 			$queryObject->fq = $catalogue->ckan_filter." AND owner_org:".$organization["ckan_uuid"];
 			//$queryObject->fq = "transparency_category_de_rp:spatial_data AND owner_org:".$organization["ckan_uuid"];
-                        $queryObject->facet = "true";
-			$queryObject->rows = "1000"; //TODO: maybe an problem somewhen
-                        $listOfFilteredData = $ckan->action_package_search(json_encode($queryObject));
-                        $countCkanMetadataArray = 0;
-                        $ckanMetadataArray = array();
-                        //echo "List of datasets in ckan instance:"."<br>";
-                        $ckanPackageNames = array();
-                        if ($listOfFilteredData->success == true) {
-			    //TODO - why only 10 records are given back when search?
-                            $syncListResultCsw->external_csw[$numberOfCatalogue]->count_ckan_packages = $listOfFilteredData->result->count;
-                            //echo json_encode($listOfFilteredData)."<br>";
-                            //$e = new mb_exception("Number of results: ".$listOfFilteredData->result->count);
-                            foreach ($listOfFilteredData->result->results as $dataset) {
-                                $ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
-                                $ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
-                                $ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
-                                if ($listAllMetadataInJson == true) {
-                                    $syncListResultCsw->external_csw[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
-                                    $syncListResultCsw->external_csw[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
-                                }
-                                //echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
-                                $ckanPackageNames[] = $dataset->name;
-                                $countCkanMetadataArray++;
-				//$e = new mb_exception("ckan dataset number: ".$countCkanMetadataArray);
-                            }
-                        } else {
-                            $e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
-                        }				
+                       $queryObject->facet = "true";
+			$queryObject->rows = "1"; //TODO: maybe an problem somewhen
+			//first count number of packages
+			$listOfFilteredDataCount = $ckan->action_package_search(json_encode($queryObject));
+			$numberOfPackagesPerPage = 200;
+			if ($listOfFilteredDataCount->success == true) {
+				$numberOfPackages = $listOfFilteredDataCount->result->count;
+				$numberOfPages = (integer)$numberOfPackages / $numberOfPackagesPerPage;
+			} else {
+				$numberOfPages = 1;
+			}
+			$queryObject->rows = (string)$numberOfPackagesPerPage;
+			//$e = new mb_exception("number of packages: ".$numberOfPackages);
+			$listOfFilteredDataArray = array();
+			for ($nP = 1; $nP <= $numberOfPages ; $nP++) {
+				//$e = new mb_exception("page: ".$nP);
+				$queryObject->start = ($nP - 1) * $numberOfPackagesPerPage;
+				$listOfFilteredDataArray[] = $ckan->action_package_search(json_encode($queryObject));
+			}
+			$countCkanMetadataArray = 0;
+			$ckanMetadataArray = array();
+ 			$ckanPackageNames = array();
+			$syncListResultCsw->external_csw[$numberOfCatalogue]->count_ckan_packages = $numberOfPackages;
+			foreach ($listOfFilteredDataArray as $listOfFilteredData) {
+                        	if ($listOfFilteredData->success == true) {
+			    		//TODO - why only 10 records are given back when search?
+                            		//$syncListResultCsw->external_ckan[$numberOfCatalogue]->count_ckan_packages = $numberOfPackages;
+                            		//echo json_encode($listOfFilteredData)."<br>";
+                            		//$e = new mb_exception("Number of results: ".$listOfFilteredData->result->count);
+                            		foreach ($listOfFilteredData->result->results as $dataset) {
+                                		$ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
+                                		if ($listAllMetadataInJson == true) {
+                                    			$syncListResultCsw->external_csw[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
+                                    			$syncListResultCsw->external_csw[$numberOfCatalogue]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
+                                		}
+                                		//echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
+                                		$ckanPackageNames[] = $dataset->name;
+                                		$countCkanMetadataArray++;
+						//$e = new mb_exception("ckan dataset number: ".$countCkanMetadataArray);
+                            		}
+                        	} else {
+                            		$e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
+                        	}	
+			}			
                         // only list http://localhost:5000/api/3/action/package_list?q=owner_org:81476cf5-6c52-4e99-8b9f-6150d63fcb32	
                         //pull all relevant information from mapbender database - first pull the resources which are owned by the corresponding group!
                         //only use metadata for which real licenses are defined !!!!!! - what should be done with the other metadata?- DO a left join!!!
@@ -719,33 +753,53 @@
                         //TODO: define standard category filter in ckan.conf for spatial data!
                         $queryObject->fq = STANDARD_CKAN_FILTER." AND owner_org:".$organization["ckan_uuid"];
 			//
-			$queryObject->rows = 1000;
 			//$queryObject->fq = "transparency_category_de_rp:spatial_data AND owner_org:".$organization["ckan_uuid"];
                         $queryObject->facet = "true";
-                        $listOfFilteredData = $ckan->action_package_search(json_encode($queryObject));
-                        $countCkanMetadataArray = 0;
-                        $ckanMetadataArray = array();
-                        //echo "List of datasets in ckan instance:"."<br>";
-                        $ckanPackageNames = array();
-                        if ($listOfFilteredData->success == true) {
-                            $syncListResult->geoportal_organization[$numberGeoportalOrga]->count_ckan_packages = $listOfFilteredData->result->count;
-                            //echo json_encode($listOfFilteredData)."<br>";
-                            //$e = new mb_exception("Number of results: ".$listOfFilteredData->result->count);
-                            foreach ($listOfFilteredData->result->results as $dataset) {
-                                $ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
-                                $ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
-                                $ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
-                                if ($listAllMetadataInJson == true) {
-                                    $syncListResult->geoportal_organization[$numberGeoportalOrga]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
-                                    $syncListResult->geoportal_organization[$numberGeoportalOrga]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
-                                }
-                                //echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
-                                $ckanPackageNames[] = $dataset->name;
-                                $countCkanMetadataArray++;
-                            }
-                        } else {
-                            $e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
-                        }				
+			$queryObject->rows = "1"; //TODO: maybe an problem somewhen
+			//first count number of packages
+			$listOfFilteredDataCount = $ckan->action_package_search(json_encode($queryObject));
+			$numberOfPackagesPerPage = 200;
+			if ($listOfFilteredDataCount->success == true) {
+				$numberOfPackages = $listOfFilteredDataCount->result->count;
+				$numberOfPages = (integer)$numberOfPackages / $numberOfPackagesPerPage;
+			} else {
+				$numberOfPages = 1;
+			}
+			$queryObject->rows = (string)$numberOfPackagesPerPage;
+			//$e = new mb_exception("number of packages: ".$numberOfPackages);
+			$listOfFilteredDataArray = array();
+			for ($nP = 1; $nP <= $numberOfPages ; $nP++) {
+				//$e = new mb_exception("page: ".$nP);
+				$queryObject->start = ($nP - 1) * $numberOfPackagesPerPage;
+				$listOfFilteredDataArray[] = $ckan->action_package_search(json_encode($queryObject));
+			}
+			$countCkanMetadataArray = 0;
+			$ckanMetadataArray = array();
+ 			$ckanPackageNames = array();
+			$syncListResult->geoportal_organization[$numberGeoportalOrga]->count_ckan_packages = $numberOfPackages;
+			foreach ($listOfFilteredDataArray as $listOfFilteredData) {
+                        	if ($listOfFilteredData->success == true) {
+			    		//TODO - why only 10 records are given back when search?
+                            		//$syncListResult->external_ckan[$numberOfCatalogue]->count_ckan_packages = $numberOfPackages;
+                            		//echo json_encode($listOfFilteredData)."<br>";
+                            		//$e = new mb_exception("Number of results: ".$listOfFilteredData->result->count);
+                            		foreach ($listOfFilteredData->result->results as $dataset) {
+                                		$ckanMetadataArray[$countCkanMetadataArray]['id'] = $dataset->id;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['name'] = $dataset->name;
+                                		$ckanMetadataArray[$countCkanMetadataArray]['changedate'] = $dataset->metadata_modified;
+                                		if ($listAllMetadataInJson == true) {
+                                    			$syncListResult->geoportal_organization[$numberGeoportalOrga]->ckan_packages[$countCkanMetadataArray]->id = $dataset->name;
+                                    			$syncListResult->geoportal_organization[$numberGeoportalOrga]->ckan_packages[$countCkanMetadataArray]->date_time = $dataset->metadata_modified;
+                                		}
+                                		//echo $dataset->title." - ".$dataset->name." - ".$dataset->metadata_modified."<br>";
+                                		$ckanPackageNames[] = $dataset->name;
+                                		$countCkanMetadataArray++;
+						//$e = new mb_exception("ckan dataset number: ".$countCkanMetadataArray);
+                            		}
+                        	} else {
+                            		$e = new mb_exception("classes/class_syncCkan.php: A problem while searching for datasets in ckan occured!");
+                        	}	
+			}			
                         //only list http://localhost:5000/api/3/action/package_list?q=owner_org:81476cf5-6c52-4e99-8b9f-6150d63fcb32	
                         //pull all relevant information from mapbender database - first pull the resources which are owned by the corresponding group!
                         //only use metadata for which real licenses are defined !!!!!! - what should be done with the other metadata?- DO a left join!!!
@@ -753,11 +807,11 @@
                         if ($organization['is_primary_group']) {
 			    $sql = "SELECT metadata_id as ressource_id, 'metadata' as ressource_type, uuid::varchar, title, lastchanged, fkey_termsofuse_id, f_get_coupled_resources(metadata_id) from mb_metadata LEFT OUTER JOIN md_termsofuse ON mb_metadata.metadata_id = md_termsofuse.fkey_metadata_id WHERE fkey_mb_user_id = $1 AND (fkey_mb_group_id is null OR fkey_mb_group_id = 0) AND export2csw IS true AND md_termsofuse.fkey_termsofuse_id in (select termsofuse_id from termsofuse where isopen = 1) ";
 			    $sql .= " UNION SELECT layer_metadata.ressource_id, layer_metadata.ressource_type, layer_metadata.uuid::varchar, layer_metadata.title, to_timestamp(layer_metadata.lastchanged), wms_termsofuse.fkey_termsofuse_id, '{\"coupledResources\":{\"layerIds\":[' || layer_metadata.ressource_id || '],\"featuretypeIds\":[]}}' as f_get_coupled_resources FROM ";
-			    $sql .= "(SELECT layer_id as ressource_id, 'layer' as ressource_type, layer.uuid::varchar, layer_title as title, wms.wms_timestamp as lastchanged, layer.fkey_wms_id FROM layer INNER JOIN wms on layer.fkey_wms_id = wms.wms_id WHERE wms_owner = $2 AND (fkey_mb_group_id is null OR fkey_mb_group_id = 0 OR fkey_mb_group_id = $3)";
+			    $sql .= "(SELECT layer_id as ressource_id, 'layer' as ressource_type, layer.uuid::varchar, layer_title as title, wms.wms_timestamp as lastchanged, layer.fkey_wms_id FROM layer INNER JOIN wms on layer.fkey_wms_id = wms.wms_id WHERE wms_owner = $2 AND (fkey_mb_group_id is null OR fkey_mb_group_id = 0)";
 			    $sql .= " AND layer.export2csw IS true AND layer.layer_searchable = 1 AND layer_id NOT IN (SELECT DISTINCT fkey_layer_id FROM ows_relation_metadata WHERE fkey_layer_id IS NOT NULL)) AS layer_metadata INNER JOIN wms_termsofuse ON layer_metadata.fkey_wms_id = wms_termsofuse.fkey_wms_id AND fkey_termsofuse_id IS NOT NULL AND wms_termsofuse.fkey_termsofuse_id IN (SELECT termsofuse_id FROM termsofuse WHERE isopen = 1)";
 			    //$e = new mb_exception("class_syncCkan.php: sql: ".$sql);
-                            $v = array($this->mapbenderUserId, $this->mapbenderUserId, $syncListResult->geoportal_organization[$numberGeoportalOrga]->id);
-                            $t = array('i','i','i');
+                            $v = array($this->mapbenderUserId, $this->mapbenderUserId);
+                            $t = array('i','i');
                         } else {
 			   $sql = "SELECT metadata_id as ressource_id, 'metadata' as ressource_type, uuid::varchar, title, lastchanged, fkey_termsofuse_id, f_get_coupled_resources(metadata_id) from mb_metadata LEFT OUTER JOIN md_termsofuse ON mb_metadata.metadata_id = md_termsofuse.fkey_metadata_id WHERE fkey_mb_group_id = $1 AND export2csw IS true AND md_termsofuse.fkey_termsofuse_id in (select termsofuse_id from termsofuse where isopen = 1)";
 			    $sql .= " UNION SELECT layer_metadata.ressource_id, layer_metadata.ressource_type, layer_metadata.uuid::varchar, layer_metadata.title, to_timestamp(layer_metadata.lastchanged), wms_termsofuse.fkey_termsofuse_id, '{\"coupledResources\":{\"layerIds\":[' || layer_metadata.ressource_id || '],\"featuretypeIds\":[]}}' ";



More information about the Mapbender_commits mailing list